Revert "HIVE-11043: ORC split strategies should adapt based on number of files 
(Gopal V reviewed by Prasanth Jayachandran)"

This reverts commit 5f78f9ef1e6c798849d34cc66721e6c1d9709b6f.


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/eb278d3c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/eb278d3c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/eb278d3c

Branch: refs/heads/beeline-cli
Commit: eb278d3c5a2ce6f11a3917f8646931630c1ee05e
Parents: 74a61e0
Author: Prasanth Jayachandran <[email protected]>
Authored: Tue Jun 23 20:50:39 2015 -0700
Committer: Prasanth Jayachandran <[email protected]>
Committed: Tue Jun 23 20:51:22 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   | 18 +---
 .../hive/ql/io/orc/TestInputOutputFormat.java   | 97 +-------------------
 2 files changed, 4 insertions(+), 111 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/eb278d3c/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 62e6de7..5d6c9da 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -374,7 +374,6 @@ public class OrcInputFormat  implements 
InputFormat<NullWritable, OrcStruct>,
     private final int numBuckets;
     private final long maxSize;
     private final long minSize;
-    private final int minSplits;
     private final boolean footerInSplits;
     private final boolean cacheStripeDetails;
     private final AtomicInteger cacheHitCounter = new AtomicInteger(0);
@@ -383,10 +382,6 @@ public class OrcInputFormat  implements 
InputFormat<NullWritable, OrcStruct>,
     private SplitStrategyKind splitStrategyKind;
 
     Context(Configuration conf) {
-      this(conf, 1);
-    }
-
-    Context(Configuration conf, final int minSplits) {
       this.conf = conf;
       minSize = conf.getLong(MIN_SPLIT_SIZE, DEFAULT_MIN_SPLIT_SIZE);
       maxSize = conf.getLong(MAX_SPLIT_SIZE, DEFAULT_MAX_SPLIT_SIZE);
@@ -409,8 +404,6 @@ public class OrcInputFormat  implements 
InputFormat<NullWritable, OrcStruct>,
 
       cacheStripeDetails = (cacheStripeDetailsSize > 0);
 
-      this.minSplits = Math.min(cacheStripeDetailsSize, minSplits);
-
       synchronized (Context.class) {
         if (threadPool == null) {
           threadPool = Executors.newFixedThreadPool(numThreads,
@@ -688,7 +681,7 @@ public class OrcInputFormat  implements 
InputFormat<NullWritable, OrcStruct>,
             break;
           default:
             // HYBRID strategy
-            if (avgFileSize > context.maxSize || numFiles <= 
context.minSplits) {
+            if (avgFileSize > context.maxSize) {
               splitStrategy = new ETLSplitStrategy(context, fs, dir, children, 
isOriginal, deltas,
                   covered);
             } else {
@@ -990,13 +983,8 @@ public class OrcInputFormat  implements 
InputFormat<NullWritable, OrcStruct>,
 
   static List<OrcSplit> generateSplitsInfo(Configuration conf)
       throws IOException {
-    return generateSplitsInfo(conf, -1);
-  }
-
-  static List<OrcSplit> generateSplitsInfo(Configuration conf, int numSplits)
-      throws IOException {
     // use threads to resolve directories into splits
-    Context context = new Context(conf, numSplits);
+    Context context = new Context(conf);
     List<OrcSplit> splits = Lists.newArrayList();
     List<Future<?>> pathFutures = Lists.newArrayList();
     List<Future<?>> splitFutures = Lists.newArrayList();
@@ -1061,7 +1049,7 @@ public class OrcInputFormat  implements 
InputFormat<NullWritable, OrcStruct>,
   public InputSplit[] getSplits(JobConf job,
                                 int numSplits) throws IOException {
     perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ORC_GET_SPLITS);
-    List<OrcSplit> result = generateSplitsInfo(job, numSplits);
+    List<OrcSplit> result = generateSplitsInfo(job);
     perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ORC_GET_SPLITS);
     return result.toArray(new InputSplit[result.size()]);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/eb278d3c/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index 12ae902..0246cd5 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -23,11 +23,8 @@ import static org.junit.Assert.assertTrue;
 
 import java.io.DataInput;
 import java.io.DataOutput;
-import java.io.File;
 import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
 import java.io.IOException;
-import java.io.PrintWriter;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.sql.Date;
@@ -70,7 +67,6 @@ import org.apache.hadoop.hive.ql.io.HiveInputFormat;
 import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
 import org.apache.hadoop.hive.ql.io.InputFormatChecker;
 import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.SplitStrategy;
-import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.SplitStrategyKind;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
@@ -398,97 +394,6 @@ public class TestInputOutputFormat {
         OrcInputFormat.getInputPaths(conf));
   }
 
-  private FileSystem generateMockFiles(final int count, final int size) {
-    final byte[] data = new byte[size];
-    MockFile[] files = new MockFile[count];
-    for (int i = 0; i < count; i++) {
-      files[i] = new MockFile(String.format("mock:/a/b/part-%d", i), size, 
data);
-    }
-    return new MockFileSystem(conf, files);
-  }
-
-  @Test
-  public void testSplitStrategySelection() throws Exception {
-
-    conf.set("mapreduce.input.fileinputformat.split.maxsize", "500");
-    conf.setLong(HiveConf.ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE.varname,
-        100);
-    final int[] counts = { 1, 10, 100, 256 };
-    final int[] sizes = { 100, 1000 };
-    final int[] numSplits = { 1, 9, 10, 11, 99, 111 };
-    final String[] strategyResults = new String[] {
-    "ETLSplitStrategy", /* 1 files x 100 size for 1 splits */
-    "ETLSplitStrategy", /* 1 files x 100 size for 9 splits */
-    "ETLSplitStrategy", /* 1 files x 100 size for 10 splits */
-    "ETLSplitStrategy", /* 1 files x 100 size for 11 splits */
-    "ETLSplitStrategy", /* 1 files x 100 size for 99 splits */
-    "ETLSplitStrategy", /* 1 files x 100 size for 111 splits */
-    "ETLSplitStrategy", /* 1 files x 1000 size for 1 splits */
-    "ETLSplitStrategy", /* 1 files x 1000 size for 9 splits */
-    "ETLSplitStrategy", /* 1 files x 1000 size for 10 splits */
-    "ETLSplitStrategy", /* 1 files x 1000 size for 11 splits */
-    "ETLSplitStrategy", /* 1 files x 1000 size for 99 splits */
-    "ETLSplitStrategy", /* 1 files x 1000 size for 111 splits */
-    "BISplitStrategy", /* 10 files x 100 size for 1 splits */
-    "BISplitStrategy", /* 10 files x 100 size for 9 splits */
-    "ETLSplitStrategy", /* 10 files x 100 size for 10 splits */
-    "ETLSplitStrategy", /* 10 files x 100 size for 11 splits */
-    "ETLSplitStrategy", /* 10 files x 100 size for 99 splits */
-    "ETLSplitStrategy", /* 10 files x 100 size for 111 splits */
-    "ETLSplitStrategy", /* 10 files x 1000 size for 1 splits */
-    "ETLSplitStrategy", /* 10 files x 1000 size for 9 splits */
-    "ETLSplitStrategy", /* 10 files x 1000 size for 10 splits */
-    "ETLSplitStrategy", /* 10 files x 1000 size for 11 splits */
-    "ETLSplitStrategy", /* 10 files x 1000 size for 99 splits */
-    "ETLSplitStrategy", /* 10 files x 1000 size for 111 splits */
-    "BISplitStrategy", /* 100 files x 100 size for 1 splits */
-    "BISplitStrategy", /* 100 files x 100 size for 9 splits */
-    "BISplitStrategy", /* 100 files x 100 size for 10 splits */
-    "BISplitStrategy", /* 100 files x 100 size for 11 splits */
-    "BISplitStrategy", /* 100 files x 100 size for 99 splits */
-    "ETLSplitStrategy", /* 100 files x 100 size for 111 splits */
-    "ETLSplitStrategy", /* 100 files x 1000 size for 1 splits */
-    "ETLSplitStrategy", /* 100 files x 1000 size for 9 splits */
-    "ETLSplitStrategy", /* 100 files x 1000 size for 10 splits */
-    "ETLSplitStrategy", /* 100 files x 1000 size for 11 splits */
-    "ETLSplitStrategy", /* 100 files x 1000 size for 99 splits */
-    "ETLSplitStrategy", /* 100 files x 1000 size for 111 splits */
-    "BISplitStrategy", /* 256 files x 100 size for 1 splits */
-    "BISplitStrategy", /* 256 files x 100 size for 9 splits */
-    "BISplitStrategy", /* 256 files x 100 size for 10 splits */
-    "BISplitStrategy", /* 256 files x 100 size for 11 splits */
-    "BISplitStrategy", /* 256 files x 100 size for 99 splits */
-    "BISplitStrategy", /* 256 files x 100 size for 111 splits */
-    "ETLSplitStrategy", /* 256 files x 1000 size for 1 splits */
-    "ETLSplitStrategy", /* 256 files x 1000 size for 9 splits */
-    "ETLSplitStrategy", /* 256 files x 1000 size for 10 splits */
-    "ETLSplitStrategy", /* 256 files x 1000 size for 11 splits */
-    "ETLSplitStrategy", /* 256 files x 1000 size for 99 splits */
-    "ETLSplitStrategy", /* 256 files x 1000 size for 111 splits */
-    };
-
-    int k = 0;
-
-    for (int c : counts) {
-      for (int s : sizes) {
-        final FileSystem fs = generateMockFiles(c, s);
-        for (int n : numSplits) {
-          final OrcInputFormat.Context context = new OrcInputFormat.Context(
-              conf, n);
-          OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(
-              context, fs, new MockPath(fs, "mock:/a/b"));
-          final SplitStrategy splitStrategy = gen.call();
-          assertTrue(
-              String.format(
-                  "Split strategy for %d files x %d size for %d splits", c, s,
-                  n),
-              splitStrategy.getClass().getSimpleName()
-                  .equals(strategyResults[k++]));
-        }
-      }
-    }
-  }
-
   @Test
   public void testFileGenerator() throws Exception {
     OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
@@ -1210,7 +1115,7 @@ public class TestInputOutputFormat {
     InputFormat<?,?> in = new OrcInputFormat();
     FileInputFormat.setInputPaths(conf, testFilePath.toString());
     InputSplit[] splits = in.getSplits(conf, 1);
-    assertTrue(0 == splits.length);
+    assertTrue(1 == splits.length);
     assertEquals(null, serde.getSerDeStats());
   }
 

Reply via email to