This is an automated email from the ASF dual-hosted git repository.

cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new 08b794b2b9 DRILL-8317: Convert LogRegex Format Plugin to EVF V2 (#2659)
08b794b2b9 is described below

commit 08b794b2b9775767cff4fb779ce9f1f964cc7a9c
Author: Charles S. Givre <[email protected]>
AuthorDate: Fri Sep 23 08:09:35 2022 -0400

    DRILL-8317: Convert LogRegex Format Plugin to EVF V2 (#2659)
---
 .../store/log => contrib/format-log}/README.md     |  0
 contrib/format-log/pom.xml                         | 83 ++++++++++++++++++++++
 .../drill/exec/store/log/LogBatchReader.java       | 39 ++++------
 .../drill/exec/store/log/LogFormatConfig.java      |  0
 .../drill/exec/store/log/LogFormatField.java       |  0
 .../drill/exec/store/log/LogFormatPlugin.java      | 32 ++++-----
 .../src/main/resources/drill-module.conf           | 23 ++++++
 .../apache/drill/exec/store/log/TestLogReader.java |  0
 .../drill/exec/store/log/TestLogReaderIssue.java   |  0
 .../src/test/resources/regex/baddates.log2         |  0
 .../src/test/resources/regex/firewall.ssdlog       |  0
 .../src/test/resources/regex/large.log1            |  0
 .../src/test/resources/regex/mysql.sqllog          |  0
 .../src/test/resources/regex/mysql.sqllog2         |  0
 .../src/test/resources/regex/simple.log1           |  0
 .../src/test/resources/regex/simple.log2           |  0
 contrib/pom.xml                                    |  1 +
 distribution/pom.xml                               |  5 ++
 distribution/src/assemble/component.xml            |  1 +
 19 files changed, 139 insertions(+), 45 deletions(-)

diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/README.md 
b/contrib/format-log/README.md
similarity index 100%
rename from 
exec/java-exec/src/main/java/org/apache/drill/exec/store/log/README.md
rename to contrib/format-log/README.md
diff --git a/contrib/format-log/pom.xml b/contrib/format-log/pom.xml
new file mode 100644
index 0000000000..aed06493e2
--- /dev/null
+++ b/contrib/format-log/pom.xml
@@ -0,0 +1,83 @@
+<?xml version="1.0"?>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <artifactId>drill-contrib-parent</artifactId>
+    <groupId>org.apache.drill.contrib</groupId>
+    <version>2.0.0-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>drill-format-log</artifactId>
+  <name>Drill : Contrib : Format : Log Regex</name>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.drill.exec</groupId>
+      <artifactId>drill-java-exec</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <!-- Test dependencies -->
+    <dependency>
+      <groupId>org.apache.drill.exec</groupId>
+      <artifactId>drill-java-exec</artifactId>
+      <classifier>tests</classifier>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.drill</groupId>
+      <artifactId>drill-common</artifactId>
+      <classifier>tests</classifier>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <plugins>
+      <plugin>
+        <artifactId>maven-resources-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>copy-java-sources</id>
+            <phase>process-sources</phase>
+            <goals>
+              <goal>copy-resources</goal>
+            </goals>
+            <configuration>
+              
<outputDirectory>${basedir}/target/classes/org/apache/drill/exec/store/log
+              </outputDirectory>
+              <resources>
+                <resource>
+                  
<directory>src/main/java/org/apache/drill/exec/store/log</directory>
+                  <filtering>true</filtering>
+                </resource>
+              </resources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java
 
b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java
similarity index 88%
rename from 
exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java
rename to 
contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java
index 1fd1ac3f2a..91e97a6dd7 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java
+++ 
b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogBatchReader.java
@@ -20,8 +20,9 @@ package org.apache.drill.exec.store.log;
 
 import org.apache.drill.common.exceptions.UserException;
 import org.apache.drill.exec.physical.impl.scan.convert.StandardConversions;
-import 
org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileSchemaNegotiator;
-import org.apache.drill.exec.physical.impl.scan.framework.ManagedReader;
+import org.apache.drill.exec.physical.impl.scan.v3.ManagedReader;
+import org.apache.drill.exec.physical.impl.scan.v3.file.FileDescrip;
+import org.apache.drill.exec.physical.impl.scan.v3.file.FileSchemaNegotiator;
 import org.apache.drill.exec.physical.resultSet.ResultSetLoader;
 import org.apache.drill.exec.physical.resultSet.RowSetLoader;
 import org.apache.drill.exec.record.metadata.ColumnMetadata;
@@ -30,7 +31,6 @@ import org.apache.drill.exec.vector.accessor.ScalarWriter;
 import org.apache.drill.exec.vector.accessor.TupleWriter;
 import org.apache.drill.exec.vector.accessor.ValueWriter;
 import org.apache.drill.shaded.guava.com.google.common.base.Charsets;
-import org.apache.hadoop.mapred.FileSplit;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -41,7 +41,7 @@ import java.io.InputStreamReader;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-public class LogBatchReader implements ManagedReader<FileSchemaNegotiator> {
+public class LogBatchReader implements ManagedReader {
   private static final Logger logger = 
LoggerFactory.getLogger(LogBatchReader.class);
   public static final String RAW_LINE_COL_NAME = "_raw";
   public static final String UNMATCHED_LINE_COL_NAME = "_unmatched_rows";
@@ -126,8 +126,7 @@ public class LogBatchReader implements 
ManagedReader<FileSchemaNegotiator> {
   }
 
   private final LogReaderConfig config;
-  private final int maxRecords;
-  private FileSplit split;
+  private final FileDescrip file;
   private BufferedReader reader;
   private ResultSetLoader loader;
   private VectorWriter vectorWriter;
@@ -137,19 +136,13 @@ public class LogBatchReader implements 
ManagedReader<FileSchemaNegotiator> {
   private int lineNumber;
   private int errorCount;
 
-  public LogBatchReader(LogReaderConfig config, int maxRecords) {
+  public LogBatchReader(LogReaderConfig config, FileSchemaNegotiator 
negotiator) {
     this.config = config;
-    this.maxRecords = maxRecords;
-  }
-
-  @Override
-  public boolean open(FileSchemaNegotiator negotiator) {
-    split = negotiator.split();
+    this.file = negotiator.file();
     negotiator.tableSchema(config.tableSchema, true);
     loader = negotiator.build();
     bindColumns(loader.writer());
-    openFile(negotiator);
-    return true;
+    openFile();
   }
 
   private void bindColumns(RowSetLoader writer) {
@@ -188,15 +181,15 @@ public class LogBatchReader implements 
ManagedReader<FileSchemaNegotiator> {
     }
   }
 
-  private void openFile(FileSchemaNegotiator negotiator) {
+  private void openFile() {
     InputStream in;
     try {
-      in = 
negotiator.fileSystem().openPossiblyCompressedStream(split.getPath());
+      in = 
file.fileSystem().openPossiblyCompressedStream(file.split().getPath());
     } catch (Exception e) {
       throw UserException
           .dataReadError(e)
           .message("Failed to open input file")
-          .addContext(String.format("File path: %s", split.getPath()))
+          .addContext(String.format("File path: %s", file.split().getPath()))
           .addContext(loader.errorContext())
           .build(logger);
     }
@@ -215,10 +208,6 @@ public class LogBatchReader implements 
ManagedReader<FileSchemaNegotiator> {
   }
 
   private boolean nextLine(RowSetLoader rowWriter) {
-    if (rowWriter.limitReached(maxRecords)) {
-      return false;
-    }
-
     String line;
     try {
       line = reader.readLine();
@@ -226,7 +215,7 @@ public class LogBatchReader implements 
ManagedReader<FileSchemaNegotiator> {
       throw UserException
           .dataReadError(e)
           .message("Error reading file")
-          .addContext(String.format("File: %s", split.getPath()))
+          .addContext(String.format("File: %s", file.split().getPath()))
           .addContext(loader.errorContext())
           .build(logger);
     }
@@ -278,7 +267,7 @@ public class LogBatchReader implements 
ManagedReader<FileSchemaNegotiator> {
     try {
       reader.close();
     } catch (IOException e) {
-      logger.warn("Error when closing file: " + split.getPath(), e);
+      logger.warn("Error when closing file: " + file.split().getPath(), e);
     } finally {
       reader = null;
     }
@@ -288,6 +277,6 @@ public class LogBatchReader implements 
ManagedReader<FileSchemaNegotiator> {
   public String toString() {
     return String.format(
         "LogRecordReader[File=%s, Line=%d]",
-        split.getPath(), lineNumber);
+        file.split().getPath(), lineNumber);
   }
 }
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatConfig.java
 
b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatConfig.java
similarity index 100%
rename from 
exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatConfig.java
rename to 
contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatConfig.java
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatField.java
 
b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatField.java
similarity index 100%
rename from 
exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatField.java
rename to 
contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatField.java
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
 
b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
similarity index 92%
rename from 
exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
rename to 
contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
index b80bed4052..c75efdf461 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
+++ 
b/contrib/format-log/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
@@ -18,23 +18,21 @@
 
 package org.apache.drill.exec.store.log;
 
-import org.apache.drill.common.exceptions.ExecutionSetupException;
 import org.apache.drill.common.exceptions.UserException;
 import org.apache.drill.common.logical.StoragePluginConfig;
 import org.apache.drill.common.types.TypeProtos.MinorType;
 import org.apache.drill.common.types.Types;
 import org.apache.drill.exec.physical.impl.scan.columns.ColumnsScanFramework;
-import 
org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileReaderFactory;
-import 
org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileScanBuilder;
-import 
org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileSchemaNegotiator;
-import org.apache.drill.exec.physical.impl.scan.framework.ManagedReader;
+import org.apache.drill.exec.physical.impl.scan.v3.ManagedReader;
+import org.apache.drill.exec.physical.impl.scan.v3.file.FileReaderFactory;
+import 
org.apache.drill.exec.physical.impl.scan.v3.file.FileScanLifecycleBuilder;
+import org.apache.drill.exec.physical.impl.scan.v3.file.FileSchemaNegotiator;
 import org.apache.drill.exec.record.metadata.ColumnMetadata;
 import org.apache.drill.exec.record.metadata.MetadataUtils;
 import org.apache.drill.exec.record.metadata.Propertied;
 import org.apache.drill.exec.record.metadata.SchemaBuilder;
 import org.apache.drill.exec.record.metadata.TupleMetadata;
 import org.apache.drill.exec.server.DrillbitContext;
-import org.apache.drill.exec.server.options.OptionSet;
 import org.apache.drill.exec.store.dfs.easy.EasyFormatPlugin;
 import org.apache.drill.exec.store.dfs.easy.EasySubScan;
 import org.apache.drill.exec.store.log.LogBatchReader.LogReaderConfig;
@@ -61,16 +59,14 @@ public class LogFormatPlugin extends 
EasyFormatPlugin<LogFormatConfig> {
 
   private static class LogReaderFactory extends FileReaderFactory {
     private final LogReaderConfig readerConfig;
-    private final int maxRecords;
 
-    public LogReaderFactory(LogReaderConfig config, int maxRecords) {
+    public LogReaderFactory(LogReaderConfig config) {
       readerConfig = config;
-      this.maxRecords = maxRecords;
     }
 
     @Override
-    public ManagedReader<? extends FileSchemaNegotiator> newReader() {
-       return new LogBatchReader(readerConfig, maxRecords);
+    public ManagedReader newReader(FileSchemaNegotiator negotiator) {
+       return new LogBatchReader(readerConfig, negotiator);
     }
   }
 
@@ -91,7 +87,7 @@ public class LogFormatPlugin extends 
EasyFormatPlugin<LogFormatConfig> {
         .fsConf(fsConf)
         .defaultName(PLUGIN_NAME)
         .readerOperatorType(OPERATOR_TYPE)
-        .scanVersion(ScanFrameworkVersion.EVF_V1)
+        .scanVersion(ScanFrameworkVersion.EVF_V2)
         .supportsLimitPushdown(true)
         .build();
   }
@@ -143,8 +139,7 @@ public class LogFormatPlugin extends 
EasyFormatPlugin<LogFormatConfig> {
    * </ul>
    */
   @Override
-  protected FileScanBuilder frameworkBuilder(EasySubScan scan, OptionSet 
options) throws ExecutionSetupException {
-
+  protected void configureScan(FileScanLifecycleBuilder builder, EasySubScan 
scan) {
     // Pattern and schema identical across readers; define
     // up front.
     final TupleMetadata providedSchema = scan.getSchema();
@@ -169,8 +164,6 @@ public class LogFormatPlugin extends 
EasyFormatPlugin<LogFormatConfig> {
 
     // Use the file framework to enable support for implicit and partition
     // columns.
-    final FileScanBuilder builder = new FileScanBuilder();
-    initScanBuilder(builder, scan);
     if (hasSchema) {
       if (!hasColumns) {
 
@@ -207,14 +200,13 @@ public class LogFormatPlugin extends 
EasyFormatPlugin<LogFormatConfig> {
 
     // Pass along the class that will create a batch reader on demand for
     // each input file.
-    builder.setReaderFactory(new LogReaderFactory(
-        new LogReaderConfig(this, pattern, providedSchema, tableSchema,
-            readerSchema, !hasSchema, groupCount, maxErrors(providedSchema)), 
scan.getMaxRecords()));
+    builder.readerFactory(new LogReaderFactory(
+      new LogReaderConfig(this, pattern, providedSchema, tableSchema,
+        readerSchema, !hasSchema, groupCount, maxErrors(providedSchema))));
 
     // The default type of regex columns is nullable VarChar,
     // so let's use that as the missing column type.
     builder.nullType(Types.optional(MinorType.VARCHAR));
-    return builder;
   }
 
   /**
diff --git a/contrib/format-log/src/main/resources/drill-module.conf 
b/contrib/format-log/src/main/resources/drill-module.conf
new file mode 100644
index 0000000000..68df1ddac0
--- /dev/null
+++ b/contrib/format-log/src/main/resources/drill-module.conf
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#  This file tells Drill to consider this module when class path scanning.
+#  This file can also include any supplementary configuration information.
+#  This file is in HOCON format, see 
https://github.com/typesafehub/config/blob/master/HOCON.md for more information.
+
+drill.classpath.scanning.packages += "org.apache.drill.exec.store.log"
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/log/TestLogReader.java
 
b/contrib/format-log/src/test/java/org/apache/drill/exec/store/log/TestLogReader.java
similarity index 100%
rename from 
exec/java-exec/src/test/java/org/apache/drill/exec/store/log/TestLogReader.java
rename to 
contrib/format-log/src/test/java/org/apache/drill/exec/store/log/TestLogReader.java
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/log/TestLogReaderIssue.java
 
b/contrib/format-log/src/test/java/org/apache/drill/exec/store/log/TestLogReaderIssue.java
similarity index 100%
rename from 
exec/java-exec/src/test/java/org/apache/drill/exec/store/log/TestLogReaderIssue.java
rename to 
contrib/format-log/src/test/java/org/apache/drill/exec/store/log/TestLogReaderIssue.java
diff --git a/exec/java-exec/src/test/resources/regex/baddates.log2 
b/contrib/format-log/src/test/resources/regex/baddates.log2
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/baddates.log2
rename to contrib/format-log/src/test/resources/regex/baddates.log2
diff --git a/exec/java-exec/src/test/resources/regex/firewall.ssdlog 
b/contrib/format-log/src/test/resources/regex/firewall.ssdlog
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/firewall.ssdlog
rename to contrib/format-log/src/test/resources/regex/firewall.ssdlog
diff --git a/exec/java-exec/src/test/resources/regex/large.log1 
b/contrib/format-log/src/test/resources/regex/large.log1
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/large.log1
rename to contrib/format-log/src/test/resources/regex/large.log1
diff --git a/exec/java-exec/src/test/resources/regex/mysql.sqllog 
b/contrib/format-log/src/test/resources/regex/mysql.sqllog
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/mysql.sqllog
rename to contrib/format-log/src/test/resources/regex/mysql.sqllog
diff --git a/exec/java-exec/src/test/resources/regex/mysql.sqllog2 
b/contrib/format-log/src/test/resources/regex/mysql.sqllog2
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/mysql.sqllog2
rename to contrib/format-log/src/test/resources/regex/mysql.sqllog2
diff --git a/exec/java-exec/src/test/resources/regex/simple.log1 
b/contrib/format-log/src/test/resources/regex/simple.log1
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/simple.log1
rename to contrib/format-log/src/test/resources/regex/simple.log1
diff --git a/exec/java-exec/src/test/resources/regex/simple.log2 
b/contrib/format-log/src/test/resources/regex/simple.log2
similarity index 100%
rename from exec/java-exec/src/test/resources/regex/simple.log2
rename to contrib/format-log/src/test/resources/regex/simple.log2
diff --git a/contrib/pom.xml b/contrib/pom.xml
index d77bfa2a39..6fd6925aae 100644
--- a/contrib/pom.xml
+++ b/contrib/pom.xml
@@ -48,6 +48,7 @@
     <module>format-excel</module>
     <module>format-httpd</module>
     <module>format-esri</module>
+    <module>format-log</module>
     <module>format-pdf</module>
     <module>format-hdf5</module>
     <module>format-sas</module>
diff --git a/distribution/pom.xml b/distribution/pom.xml
index 60a2bb1d14..ce39807138 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -469,6 +469,11 @@
           <artifactId>drill-format-excel</artifactId>
           <version>${project.version}</version>
         </dependency>
+        <dependency>
+          <groupId>org.apache.drill.contrib</groupId>
+          <artifactId>drill-format-log</artifactId>
+          <version>${project.version}</version>
+        </dependency>
         <dependency>
           <groupId>org.apache.drill.contrib</groupId>
           <artifactId>drill-druid-storage</artifactId>
diff --git a/distribution/src/assemble/component.xml 
b/distribution/src/assemble/component.xml
index 6927b05ff0..38d79f01ae 100644
--- a/distribution/src/assemble/component.xml
+++ b/distribution/src/assemble/component.xml
@@ -49,6 +49,7 @@
         <include>org.apache.drill.contrib:drill-format-image:jar</include>
         <include>org.apache.drill.contrib:drill-format-pcapng:jar</include>
         <include>org.apache.drill.contrib:drill-format-hdf5:jar</include>
+        <include>org.apache.drill.contrib:drill-format-log:jar</include>
         <include>org.apache.drill.contrib:drill-format-ltsv:jar</include>
         <include>org.apache.drill.contrib:drill-format-httpd:jar</include>
         <include>org.apache.drill.contrib:drill-format-pdf:jar</include>

Reply via email to