paul-rogers commented on a change in pull request #2485:
URL: https://github.com/apache/drill/pull/2485#discussion_r830750559



##########
File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/TextFormatConfig.java
##########
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.easy.text;
+
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.drill.common.PlanStringBuilder;
+import org.apache.drill.common.logical.FormatPluginConfig;
+import org.apache.drill.shaded.guava.com.google.common.base.Strings;
+import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList;
+
+import com.fasterxml.jackson.annotation.JsonAlias;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeName;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+
+@JsonTypeName(TextFormatPlugin.PLUGIN_NAME)
+@JsonInclude(Include.NON_DEFAULT)
+public class TextFormatConfig implements FormatPluginConfig {
+
+  public final List<String> extensions;
+  public final String lineDelimiter;
+  public final char fieldDelimiter;
+  public final char quote;
+  public final char escape;
+  public final char comment;
+  public final boolean skipFirstLine;
+  public final boolean extractHeader;
+
+  @JsonCreator
+  public TextFormatConfig(
+      @JsonProperty("extensions") List<String> extensions,
+      @JsonProperty("lineDelimiter") String lineDelimiter,
+      // Drill 1.17 and before used "delimiter" in the
+      // bootstrap storage plugins file, assume many instances
+      // exist in the field.
+      @JsonAlias("delimiter")

Review comment:
       To be honest, I don't even remember adding this, but I must have read up 
on Jackson at some point.

##########
File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/TextFormatConfig.java
##########
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.easy.text;
+
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.drill.common.PlanStringBuilder;
+import org.apache.drill.common.logical.FormatPluginConfig;
+import org.apache.drill.shaded.guava.com.google.common.base.Strings;
+import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList;
+
+import com.fasterxml.jackson.annotation.JsonAlias;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeName;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+
+@JsonTypeName(TextFormatPlugin.PLUGIN_NAME)
+@JsonInclude(Include.NON_DEFAULT)
+public class TextFormatConfig implements FormatPluginConfig {
+
+  public final List<String> extensions;
+  public final String lineDelimiter;
+  public final char fieldDelimiter;
+  public final char quote;
+  public final char escape;
+  public final char comment;
+  public final boolean skipFirstLine;
+  public final boolean extractHeader;
+
+  @JsonCreator
+  public TextFormatConfig(
+      @JsonProperty("extensions") List<String> extensions,
+      @JsonProperty("lineDelimiter") String lineDelimiter,
+      // Drill 1.17 and before used "delimiter" in the
+      // bootstrap storage plugins file, assume many instances
+      // exist in the field.
+      @JsonAlias("delimiter")
+      @JsonProperty("fieldDelimiter") String fieldDelimiter,
+      @JsonProperty("quote") String quote,
+      @JsonProperty("escape") String escape,
+      @JsonProperty("comment") String comment,
+      @JsonProperty("skipFirstLine") Boolean skipFirstLine,
+      @JsonProperty("extractHeader") Boolean extractHeader) {
+    this.extensions = extensions == null ?
+        ImmutableList.of() : ImmutableList.copyOf(extensions);
+    this.lineDelimiter = lineDelimiter == null ? "\n" : lineDelimiter;
+    this.fieldDelimiter = Strings.isNullOrEmpty(fieldDelimiter) ? ',' : 
fieldDelimiter.charAt(0);
+    this.quote = Strings.isNullOrEmpty(quote) ? '"' : quote.charAt(0);
+    this.escape = Strings.isNullOrEmpty(escape) ? '"' : escape.charAt(0);
+    this.comment = Strings.isNullOrEmpty(comment) ? '#' : comment.charAt(0);
+    this.skipFirstLine = skipFirstLine == null ? false : skipFirstLine;
+    this.extractHeader = extractHeader == null ? false : extractHeader;
+  }
+
+  public TextFormatConfig() {

Review comment:
       Heck if I know: I just moved the code from one file to another. A search 
showed that the only reference is here:
   
   ```java
   
     public TextFormatPlugin(String name, DrillbitContext context, 
Configuration fsConf, StoragePluginConfig storageConfig) {
        this(name, context, fsConf, storageConfig, new TextFormatConfig());
     }
   ```
   
   Which itself has zero references.
   
   I removed both. Let's see if tests still pass.

##########
File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/TextFormatPlugin.java
##########
@@ -81,9 +68,8 @@
  * to allow tight control of the size of produced batches (as well
  * as to support provided schema.)
  */
-public class TextFormatPlugin extends 
EasyFormatPlugin<TextFormatPlugin.TextFormatConfig> {
-
-  private final static String PLUGIN_NAME = "text";
+public class TextFormatPlugin extends EasyFormatPlugin<TextFormatConfig> {
+  final static String PLUGIN_NAME = "text";

Review comment:
       Drill was originally designed for big data: huge CSV files that get 
split across readers. That huge file might have a header, but more typically 
did not. The CSV reader still handles both cases: if the file has a header, it 
will seek to the beginning of the file even if the "file work" says to start at 
an offset of 256MB, etc.
   
   In your use case, you likely work with "small data" but, with much variety 
and you want to use Drill as a convenient way to work with such data sets. 
(Hence all the recent work on the REST API which is decidedly *not* a big data 
solution!)
   
   If we change the default, then any "big data" users will find their apps 
break if they still happen to use CSV files without headers.
   
   This might be something the team chooses to change in Drill 2.0 along with 
other breaking changes: shift the focus from old-style HDFS files to modern S3 
files are your case of small data-science style files.

##########
File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/TextFormatConfig.java
##########
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.easy.text;
+
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.drill.common.PlanStringBuilder;
+import org.apache.drill.common.logical.FormatPluginConfig;
+import org.apache.drill.shaded.guava.com.google.common.base.Strings;
+import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList;
+
+import com.fasterxml.jackson.annotation.JsonAlias;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeName;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+
+@JsonTypeName(TextFormatPlugin.PLUGIN_NAME)
+@JsonInclude(Include.NON_DEFAULT)
+public class TextFormatConfig implements FormatPluginConfig {
+
+  public final List<String> extensions;
+  public final String lineDelimiter;
+  public final char fieldDelimiter;
+  public final char quote;
+  public final char escape;
+  public final char comment;
+  public final boolean skipFirstLine;
+  public final boolean extractHeader;
+
+  @JsonCreator
+  public TextFormatConfig(
+      @JsonProperty("extensions") List<String> extensions,
+      @JsonProperty("lineDelimiter") String lineDelimiter,
+      // Drill 1.17 and before used "delimiter" in the
+      // bootstrap storage plugins file, assume many instances
+      // exist in the field.
+      @JsonAlias("delimiter")
+      @JsonProperty("fieldDelimiter") String fieldDelimiter,
+      @JsonProperty("quote") String quote,
+      @JsonProperty("escape") String escape,
+      @JsonProperty("comment") String comment,
+      @JsonProperty("skipFirstLine") Boolean skipFirstLine,
+      @JsonProperty("extractHeader") Boolean extractHeader) {
+    this.extensions = extensions == null ?
+        ImmutableList.of() : ImmutableList.copyOf(extensions);
+    this.lineDelimiter = lineDelimiter == null ? "\n" : lineDelimiter;
+    this.fieldDelimiter = Strings.isNullOrEmpty(fieldDelimiter) ? ',' : 
fieldDelimiter.charAt(0);
+    this.quote = Strings.isNullOrEmpty(quote) ? '"' : quote.charAt(0);
+    this.escape = Strings.isNullOrEmpty(escape) ? '"' : escape.charAt(0);
+    this.comment = Strings.isNullOrEmpty(comment) ? '#' : comment.charAt(0);
+    this.skipFirstLine = skipFirstLine == null ? false : skipFirstLine;
+    this.extractHeader = extractHeader == null ? false : extractHeader;

Review comment:
       If we change this default, we possibly break production systems. I would 
suggest doing this in Drill 2.0 if the team feels that the old big-data use 
cases are no longer common.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@drill.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to