clintropolis commented on a change in pull request #9360: Create splits of 
multiple files for parallel indexing
URL: https://github.com/apache/druid/pull/9360#discussion_r381829041
 
 

 ##########
 File path: 
core/src/main/java/org/apache/druid/data/input/impl/LocalInputSource.java
 ##########
 @@ -34,28 +39,46 @@
 import org.apache.druid.data.input.InputSourceReader;
 import org.apache.druid.data.input.InputSplit;
 import org.apache.druid.data.input.SplitHintSpec;
+import org.apache.druid.java.util.common.IAE;
+import org.apache.druid.utils.CollectionUtils;
 import org.apache.druid.utils.Streams;
 
 import javax.annotation.Nullable;
 import java.io.File;
+import java.util.Collections;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Objects;
+import java.util.Set;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 public class LocalInputSource extends AbstractInputSource implements 
SplittableInputSource<List<File>>
 {
   private final File baseDir;
   private final String filter;
+  private final Set<File> files;
 
   @JsonCreator
   public LocalInputSource(
       @JsonProperty("baseDir") File baseDir,
-      @JsonProperty("filter") String filter
+      @JsonProperty("filter") String filter,
+      @JsonProperty("files") Set<File> files
   )
   {
-    this.baseDir = Preconditions.checkNotNull(baseDir, "baseDir");
-    this.filter = Preconditions.checkNotNull(filter, "filter");
+    this.baseDir = baseDir;
+    this.filter = baseDir != null ? Preconditions.checkNotNull(filter, 
"filter") : filter;
+    this.files = files;
+
+    if (baseDir == null && CollectionUtils.isNullOrEmpty(files)) {
+      throw new IAE("Either one of baseDir or files should be specified");
 
 Review comment:
   Is this better to accept both baseDir + filter and explicit files list, or 
should you specify one or the other exclusively?
   
   If you think accepting both is better then this exception message should 
probably say 'At least one of ...' instead of 'Either one of'.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to