hive git commit: HIVE-13841: Orc split generation returns different strategies with cache enabled vs disabled (Prasanth Jayachandran reviewed by Sergey Shelukhin)

2016-06-14 Thread prasanthj
Repository: hive
Updated Branches:
  refs/heads/branch-1 370f5d799 -> 22a910b1f


HIVE-13841: Orc split generation returns different strategies with cache 
enabled vs disabled (Prasanth Jayachandran reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/22a910b1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/22a910b1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/22a910b1

Branch: refs/heads/branch-1
Commit: 22a910b1fbf2539c3bfd35399dfa552d590a5f0b
Parents: 370f5d7
Author: Prasanth Jayachandran 
Authored: Tue Jun 14 18:49:21 2016 -0700
Committer: Prasanth Jayachandran 
Committed: Tue Jun 14 18:49:21 2016 -0700

--
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   |  7 ---
 .../hive/ql/io/orc/TestInputOutputFormat.java   | 21 
 2 files changed, 25 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/22a910b1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 2d6ef9a..9ac34b7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -126,6 +126,7 @@ public class OrcInputFormat implements 
InputFormat,
 
   private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024;
   private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024;
+  private static final int DEFAULT_ETL_FILE_THRESHOLD = 100;
 
   private static final PerfLogger perfLogger = PerfLogger.getPerfLogger();
   private static final String CLASS_NAME = ReaderImpl.class.getName();
@@ -434,7 +435,7 @@ public class OrcInputFormat implements 
InputFormat,
 private final int numBuckets;
 private final long maxSize;
 private final long minSize;
-private final int minSplits;
+private final int etlFileThreshold;
 private final boolean footerInSplits;
 private final boolean cacheStripeDetails;
 private final AtomicInteger cacheHitCounter = new AtomicInteger(0);
@@ -469,7 +470,7 @@ public class OrcInputFormat implements 
InputFormat,
 
   cacheStripeDetails = (cacheStripeDetailsSize > 0);
 
-  this.minSplits = Math.min(cacheStripeDetailsSize, minSplits);
+  this.etlFileThreshold = minSplits <= 0 ? DEFAULT_ETL_FILE_THRESHOLD : 
minSplits;
 
   synchronized (Context.class) {
 if (threadPool == null) {
@@ -748,7 +749,7 @@ public class OrcInputFormat implements 
InputFormat,
 break;
   default:
 // HYBRID strategy
-if (avgFileSize > context.maxSize || totalFiles <= 
context.minSplits) {
+if (avgFileSize > context.maxSize || totalFiles <= 
context.etlFileThreshold) {
   splitStrategy = new ETLSplitStrategy(context, fs, dir, children, 
isOriginal, deltas,
   covered);
 } else {

http://git-wip-us.apache.org/repos/asf/hive/blob/22a910b1/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index c0d912d..fa32bf6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -509,6 +509,27 @@ public class TestInputOutputFormat {
 }
   }
 }
+
+k = 0;
+conf.set("hive.orc.cache.stripe.details.size", "-1");
+for (int c : counts) {
+  for (int s : sizes) {
+final FileSystem fs = generateMockFiles(c, s);
+for (int n : numSplits) {
+  final OrcInputFormat.Context context = new OrcInputFormat.Context(
+  conf, n);
+  OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(
+  context, fs, new MockPath(fs, "mock:/a/b"));
+  final SplitStrategy splitStrategy = gen.call();
+  assertTrue(
+  String.format(
+  "Split strategy for %d files x %d size for %d splits", c, s,
+  n),
+  splitStrategy.getClass().getSimpleName()
+  .equals(strategyResults[k++]));
+}
+  }
+}
   }
 
   @Test



hive git commit: HIVE-13841: Orc split generation returns different strategies with cache enabled vs disabled (Prasanth Jayachandran reviewed by Sergey Shelukhin)

2016-05-27 Thread prasanthj
Repository: hive
Updated Branches:
  refs/heads/branch-2.1 e276929df -> a18b0225e


HIVE-13841: Orc split generation returns different strategies with cache 
enabled vs disabled (Prasanth Jayachandran reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a18b0225
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a18b0225
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a18b0225

Branch: refs/heads/branch-2.1
Commit: a18b0225ed20c5be2ab898c6ca941e4c1ab1e5f4
Parents: e276929
Author: Prasanth Jayachandran 
Authored: Fri May 27 16:41:50 2016 -0700
Committer: Prasanth Jayachandran 
Committed: Fri May 27 16:42:35 2016 -0700

--
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   |  7 ---
 .../hive/ql/io/orc/TestInputOutputFormat.java   | 21 
 2 files changed, 25 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/a18b0225/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 33fe3b6..087207b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -155,6 +155,7 @@ public class OrcInputFormat implements 
InputFormat,
 
   private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024;
   private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024;
+  private static final int DEFAULT_ETL_FILE_THRESHOLD = 100;
 
   /**
* When picking the hosts for a split that crosses block boundaries,
@@ -510,7 +511,7 @@ public class OrcInputFormat implements 
InputFormat,
 private final int splitStrategyBatchMs;
 private final long maxSize;
 private final long minSize;
-private final int minSplits;
+private final int etlFileThreshold;
 private final boolean footerInSplits;
 private final boolean cacheStripeDetails;
 private final boolean forceThreadpool;
@@ -555,7 +556,7 @@ public class OrcInputFormat implements 
InputFormat,
 
   cacheStripeDetails = (cacheStripeDetailsSize > 0);
 
-  this.minSplits = Math.min(cacheStripeDetailsSize, minSplits);
+  this.etlFileThreshold = minSplits <= 0 ? DEFAULT_ETL_FILE_THRESHOLD : 
minSplits;
 
   synchronized (Context.class) {
 if (threadPool == null) {
@@ -1938,7 +1939,7 @@ public class OrcInputFormat implements 
InputFormat,
   deltas, covered, isOriginal, ugi, allowSyntheticFileIds);
 default:
   // HYBRID strategy
-  if (avgFileSize > context.maxSize || totalFiles <= 
context.minSplits) {
+  if (avgFileSize > context.maxSize || totalFiles <= 
context.etlFileThreshold) {
 return combineOrCreateETLStrategy(combinedCtx, context, fs, dir, 
baseOrOriginalFiles,
 deltas, covered, isOriginal, ugi, allowSyntheticFileIds);
   } else {

http://git-wip-us.apache.org/repos/asf/hive/blob/a18b0225/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index c1ef0e7..52098ae 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -525,6 +525,27 @@ public class TestInputOutputFormat {
 }
   }
 }
+
+k = 0;
+conf.set("hive.orc.cache.stripe.details.size", "-1");
+for (int c : counts) {
+  for (int s : sizes) {
+final FileSystem fs = generateMockFiles(c, s);
+for (int n : numSplits) {
+  final OrcInputFormat.Context context = new OrcInputFormat.Context(
+  conf, n);
+  OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(
+  context, fs, new MockPath(fs, "mock:/a/b"), false, null);
+  final SplitStrategy splitStrategy = createSplitStrategy(context, 
gen);
+  assertTrue(
+  String.format(
+  "Split strategy for %d files x %d size for %d splits", c, s,
+  n),
+  splitStrategy.getClass().getSimpleName()
+  .equals(strategyResults[k++]));
+}
+  }
+}
   }
 
   @Test



hive git commit: HIVE-13841: Orc split generation returns different strategies with cache enabled vs disabled (Prasanth Jayachandran reviewed by Sergey Shelukhin)

2016-05-27 Thread prasanthj
Repository: hive
Updated Branches:
  refs/heads/master 02b2fb5a9 -> 4e3da98d7


HIVE-13841: Orc split generation returns different strategies with cache 
enabled vs disabled (Prasanth Jayachandran reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4e3da98d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4e3da98d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4e3da98d

Branch: refs/heads/master
Commit: 4e3da98d7f05ae29c71bd379c3f59691588c0209
Parents: 02b2fb5
Author: Prasanth Jayachandran 
Authored: Fri May 27 16:41:50 2016 -0700
Committer: Prasanth Jayachandran 
Committed: Fri May 27 16:41:50 2016 -0700

--
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   |  7 ---
 .../hive/ql/io/orc/TestInputOutputFormat.java   | 21 
 2 files changed, 25 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/4e3da98d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 33fe3b6..087207b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -155,6 +155,7 @@ public class OrcInputFormat implements 
InputFormat,
 
   private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024;
   private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024;
+  private static final int DEFAULT_ETL_FILE_THRESHOLD = 100;
 
   /**
* When picking the hosts for a split that crosses block boundaries,
@@ -510,7 +511,7 @@ public class OrcInputFormat implements 
InputFormat,
 private final int splitStrategyBatchMs;
 private final long maxSize;
 private final long minSize;
-private final int minSplits;
+private final int etlFileThreshold;
 private final boolean footerInSplits;
 private final boolean cacheStripeDetails;
 private final boolean forceThreadpool;
@@ -555,7 +556,7 @@ public class OrcInputFormat implements 
InputFormat,
 
   cacheStripeDetails = (cacheStripeDetailsSize > 0);
 
-  this.minSplits = Math.min(cacheStripeDetailsSize, minSplits);
+  this.etlFileThreshold = minSplits <= 0 ? DEFAULT_ETL_FILE_THRESHOLD : 
minSplits;
 
   synchronized (Context.class) {
 if (threadPool == null) {
@@ -1938,7 +1939,7 @@ public class OrcInputFormat implements 
InputFormat,
   deltas, covered, isOriginal, ugi, allowSyntheticFileIds);
 default:
   // HYBRID strategy
-  if (avgFileSize > context.maxSize || totalFiles <= 
context.minSplits) {
+  if (avgFileSize > context.maxSize || totalFiles <= 
context.etlFileThreshold) {
 return combineOrCreateETLStrategy(combinedCtx, context, fs, dir, 
baseOrOriginalFiles,
 deltas, covered, isOriginal, ugi, allowSyntheticFileIds);
   } else {

http://git-wip-us.apache.org/repos/asf/hive/blob/4e3da98d/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index c1ef0e7..52098ae 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -525,6 +525,27 @@ public class TestInputOutputFormat {
 }
   }
 }
+
+k = 0;
+conf.set("hive.orc.cache.stripe.details.size", "-1");
+for (int c : counts) {
+  for (int s : sizes) {
+final FileSystem fs = generateMockFiles(c, s);
+for (int n : numSplits) {
+  final OrcInputFormat.Context context = new OrcInputFormat.Context(
+  conf, n);
+  OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(
+  context, fs, new MockPath(fs, "mock:/a/b"), false, null);
+  final SplitStrategy splitStrategy = createSplitStrategy(context, 
gen);
+  assertTrue(
+  String.format(
+  "Split strategy for %d files x %d size for %d splits", c, s,
+  n),
+  splitStrategy.getClass().getSimpleName()
+  .equals(strategyResults[k++]));
+}
+  }
+}
   }
 
   @Test