[ 
https://issues.apache.org/jira/browse/DRILL-7293?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16862959#comment-16862959
 ] 

ASF GitHub Bot commented on DRILL-7293:
---------------------------------------

arina-ielchiieva commented on pull request #1807: DRILL-7293: Convert the regex 
("log") plugin to use EVF
URL: https://github.com/apache/drill/pull/1807#discussion_r293323694
 
 

 ##########
 File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/log/LogFormatPlugin.java
 ##########
 @@ -18,86 +18,224 @@
 
 package org.apache.drill.exec.store.log;
 
-import java.io.IOException;
-import org.apache.drill.exec.planner.common.DrillStatsTable;
-import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
 import org.apache.drill.common.exceptions.ExecutionSetupException;
-import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.exceptions.UserException;
 import org.apache.drill.common.logical.StoragePluginConfig;
-import org.apache.drill.exec.ops.FragmentContext;
-import org.apache.drill.exec.proto.UserBitShared;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.common.types.Types;
+import 
org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileReaderFactory;
+import 
org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileScanBuilder;
+import 
org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileSchemaNegotiator;
+import org.apache.drill.exec.physical.impl.scan.framework.ManagedReader;
+import org.apache.drill.exec.proto.UserBitShared.CoreOperatorType;
+import org.apache.drill.exec.record.metadata.ColumnMetadata;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.drill.exec.record.metadata.TupleMetadata;
 import org.apache.drill.exec.server.DrillbitContext;
-import org.apache.drill.exec.store.RecordReader;
-import org.apache.drill.exec.store.RecordWriter;
-import org.apache.drill.exec.store.dfs.DrillFileSystem;
+import org.apache.drill.exec.server.options.OptionManager;
 import org.apache.drill.exec.store.dfs.easy.EasyFormatPlugin;
-import org.apache.drill.exec.store.dfs.easy.EasyWriter;
-import org.apache.drill.exec.store.dfs.easy.FileWork;
+import org.apache.drill.exec.store.dfs.easy.EasySubScan;
+import org.apache.drill.shaded.guava.com.google.common.base.Strings;
+import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
 
-import java.util.List;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
 public class LogFormatPlugin extends EasyFormatPlugin<LogFormatConfig> {
+  public static final String PLUGIN_NAME = "logRegex";
+  private static final org.slf4j.Logger logger = 
org.slf4j.LoggerFactory.getLogger(LogFormatPlugin.class);
+
+  private static class LogReaderFactory extends FileReaderFactory {
+    private final LogFormatPlugin plugin;
+    private final Pattern pattern;
+    private final TupleMetadata schema;
+
+    public LogReaderFactory(LogFormatPlugin plugin, Pattern pattern, 
TupleMetadata schema) {
+      this.plugin = plugin;
+      this.pattern = pattern;
+      this.schema = schema;
+    }
 
-  public static final String DEFAULT_NAME = "logRegex";
-  private final LogFormatConfig formatConfig;
+    @Override
+    public ManagedReader<? extends FileSchemaNegotiator> newReader() {
+       return new LogBatchReader(plugin.getConfig(), pattern, schema);
+    }
+  }
 
   public LogFormatPlugin(String name, DrillbitContext context,
                          Configuration fsConf, StoragePluginConfig 
storageConfig,
                          LogFormatConfig formatConfig) {
-    super(name, context, fsConf, storageConfig, formatConfig,
-        true,  // readable
-        false, // writable
-        true, // blockSplittable
-        true,  // compressible
-        Lists.newArrayList(formatConfig.getExtension()),
-        DEFAULT_NAME);
-    this.formatConfig = formatConfig;
+    super(name, easyConfig(fsConf, formatConfig), context, storageConfig, 
formatConfig);
   }
 
-  @Override
-  public RecordReader getRecordReader(FragmentContext context,
-                                      DrillFileSystem dfs, FileWork fileWork, 
List<SchemaPath> columns,
-                                      String userName) throws 
ExecutionSetupException {
-    return new LogRecordReader(context, dfs, fileWork,
-        columns, userName, formatConfig);
+  private static EasyFormatConfig easyConfig(Configuration fsConf, 
LogFormatConfig pluginConfig) {
+    EasyFormatConfig config = new EasyFormatConfig();
+    config.readable = true;
+    config.writable = false;
+    // Should be block splitable, but logic not yet implemented.
+    config.blockSplittable = false;
+    config.compressible = true;
+    config.supportsProjectPushdown = true;
+    config.extensions = Lists.newArrayList(pluginConfig.getExtension());
+    config.fsConf = fsConf;
+    config.defaultName = PLUGIN_NAME;
+    config.readerOperatorType = CoreOperatorType.REGEX_SUB_SCAN_VALUE;
+    config.useEnhancedScan = true;
+    return config;
   }
 
   @Override
-  public boolean supportsPushDown() {
-    return true;
-  }
+  protected FileScanBuilder frameworkBuilder(
+      OptionManager options, EasySubScan scan) throws ExecutionSetupException {
 
-  @Override
-  public RecordWriter getRecordWriter(FragmentContext context,
-                                      EasyWriter writer) throws 
UnsupportedOperationException {
-    throw new UnsupportedOperationException("unimplemented");
-  }
+    // Pattern and schema identical across readers; define
+    // up front.
 
-  @Override
-  public int getReaderOperatorType() {
-    return UserBitShared.CoreOperatorType.REGEX_SUB_SCAN_VALUE;
+    Pattern pattern = setupPattern();
+    Matcher m = pattern.matcher("test");
 
 Review comment:
   Why we are matching `test` here?
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Convert the regex ("log") plugin to use EVF
> -------------------------------------------
>
>                 Key: DRILL-7293
>                 URL: https://issues.apache.org/jira/browse/DRILL-7293
>             Project: Apache Drill
>          Issue Type: Improvement
>    Affects Versions: 1.16.0
>            Reporter: Paul Rogers
>            Assignee: Paul Rogers
>            Priority: Major
>             Fix For: 1.17.0
>
>
> The "log" plugin (which uses a regex to define the row format) is the subject 
> of Chapter 12 of the Learning Apache Drill book (though the version in the 
> book is simpler than the one in the master branch.)
> The recently-completed "Enhanced Vector Framework" (EVF, AKA the "row set 
> framework") gives Drill control over the size of batches created by readers, 
> and allows readers to use the recently-added provided schema mechanism.
> We wish to use the log reader as an example for how to convert a Drill format 
> plugin to use the EVF so that other developers can convert their own plugins.
> This PR provides the first set of log plugin changes to enable us to publish 
> a tutorial on the EVF.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to