This is an automated email from the ASF dual-hosted git repository.

shirshanka pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new 1d32adb  [GOBBLIN-968] Honor file split size for HadoopFileInputSource
1d32adb is described below

commit 1d32adbe300ff3121cf2b826990c0616f625a27c
Author: ctodarka <[email protected]>
AuthorDate: Wed Dec 9 19:07:04 2020 -0800

    [GOBBLIN-968] Honor file split size for HadoopFileInputSource
    
    Set the value of SPLIT_MAXSIZE and SPLIT_MINSIZE
    in FileInputFormat class from the configs
    (source.hadoop.file.input.split.minsize and
    source.hadoop.file.input.split.maxsize)
    
    Closes #2816 from chiragtodarka/GOBBLIN-968
---
 .../gobblin/source/extractor/hadoop/HadoopFileInputSource.java | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git 
a/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/hadoop/HadoopFileInputSource.java
 
b/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/hadoop/HadoopFileInputSource.java
index 98a5c52..34406ac 100644
--- 
a/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/hadoop/HadoopFileInputSource.java
+++ 
b/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/hadoop/HadoopFileInputSource.java
@@ -78,6 +78,8 @@ public abstract class HadoopFileInputSource<S, D, K, V> 
extends AbstractSource<S
   public static final String FILE_SPLITS_DESIRED_KEY = 
HADOOP_SOURCE_KEY_PREFIX + "file.splits.desired";
   public static final int DEFAULT_FILE_SPLITS_DESIRED = 1;
   public static final String FILE_INPUT_PATHS_KEY = HADOOP_SOURCE_KEY_PREFIX + 
"file.input.paths";
+  public static final String FILE_INPUT_SPLIT_MINSIZE = 
HADOOP_SOURCE_KEY_PREFIX + "file.input.split.minsize";
+  public static final String FILE_INPUT_SPLIT_MAXSIZE = 
HADOOP_SOURCE_KEY_PREFIX + "file.input.split.maxsize";
   public static final String FILE_INPUT_READ_KEYS_KEY = 
HADOOP_SOURCE_KEY_PREFIX + "file.read.keys";
   public static final boolean DEFAULT_FILE_INPUT_READ_KEYS = false;
   public static final String FILE_SPLIT_PATH_KEY = HADOOP_SOURCE_KEY_PREFIX + 
"file.split.path";
@@ -94,6 +96,14 @@ public abstract class HadoopFileInputSource<S, D, K, V> 
extends AbstractSource<S
         }
       }
 
+      if (state.contains(FILE_INPUT_SPLIT_MINSIZE)) {
+        FileInputFormat.setMinInputSplitSize(job, 
state.getPropAsLong(FILE_INPUT_SPLIT_MINSIZE));
+      }
+
+      if (state.contains(FILE_INPUT_SPLIT_MAXSIZE)) {
+        FileInputFormat.setMaxInputSplitSize(job, 
state.getPropAsLong(FILE_INPUT_SPLIT_MAXSIZE));
+      }
+
       FileInputFormat<K, V> fileInputFormat = getFileInputFormat(state, 
job.getConfiguration());
       List<InputSplit> fileSplits = fileInputFormat.getSplits(job);
       if (fileSplits == null || fileSplits.isEmpty()) {

Reply via email to