hive git commit: HIVE-13841: Orc split generation returns different strategies with cache enabled vs disabled (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Repository: hive Updated Branches: refs/heads/branch-1 370f5d799 -> 22a910b1f HIVE-13841: Orc split generation returns different strategies with cache enabled vs disabled (Prasanth Jayachandran reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/22a910b1 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/22a910b1 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/22a910b1 Branch: refs/heads/branch-1 Commit: 22a910b1fbf2539c3bfd35399dfa552d590a5f0b Parents: 370f5d7 Author: Prasanth JayachandranAuthored: Tue Jun 14 18:49:21 2016 -0700 Committer: Prasanth Jayachandran Committed: Tue Jun 14 18:49:21 2016 -0700 -- .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 7 --- .../hive/ql/io/orc/TestInputOutputFormat.java | 21 2 files changed, 25 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/22a910b1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 2d6ef9a..9ac34b7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -126,6 +126,7 @@ public class OrcInputFormat implements InputFormat , private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024; private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024; + private static final int DEFAULT_ETL_FILE_THRESHOLD = 100; private static final PerfLogger perfLogger = PerfLogger.getPerfLogger(); private static final String CLASS_NAME = ReaderImpl.class.getName(); @@ -434,7 +435,7 @@ public class OrcInputFormat implements InputFormat , private final int numBuckets; private final long maxSize; private final long minSize; -private final int minSplits; +private final int etlFileThreshold; private final boolean footerInSplits; private final boolean cacheStripeDetails; private final AtomicInteger cacheHitCounter = new AtomicInteger(0); @@ -469,7 +470,7 @@ public class OrcInputFormat implements InputFormat , cacheStripeDetails = (cacheStripeDetailsSize > 0); - this.minSplits = Math.min(cacheStripeDetailsSize, minSplits); + this.etlFileThreshold = minSplits <= 0 ? DEFAULT_ETL_FILE_THRESHOLD : minSplits; synchronized (Context.class) { if (threadPool == null) { @@ -748,7 +749,7 @@ public class OrcInputFormat implements InputFormat , break; default: // HYBRID strategy -if (avgFileSize > context.maxSize || totalFiles <= context.minSplits) { +if (avgFileSize > context.maxSize || totalFiles <= context.etlFileThreshold) { splitStrategy = new ETLSplitStrategy(context, fs, dir, children, isOriginal, deltas, covered); } else { http://git-wip-us.apache.org/repos/asf/hive/blob/22a910b1/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index c0d912d..fa32bf6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -509,6 +509,27 @@ public class TestInputOutputFormat { } } } + +k = 0; +conf.set("hive.orc.cache.stripe.details.size", "-1"); +for (int c : counts) { + for (int s : sizes) { +final FileSystem fs = generateMockFiles(c, s); +for (int n : numSplits) { + final OrcInputFormat.Context context = new OrcInputFormat.Context( + conf, n); + OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator( + context, fs, new MockPath(fs, "mock:/a/b")); + final SplitStrategy splitStrategy = gen.call(); + assertTrue( + String.format( + "Split strategy for %d files x %d size for %d splits", c, s, + n), + splitStrategy.getClass().getSimpleName() + .equals(strategyResults[k++])); +} + } +} } @Test
hive git commit: HIVE-13841: Orc split generation returns different strategies with cache enabled vs disabled (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Repository: hive Updated Branches: refs/heads/branch-2.1 e276929df -> a18b0225e HIVE-13841: Orc split generation returns different strategies with cache enabled vs disabled (Prasanth Jayachandran reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a18b0225 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a18b0225 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a18b0225 Branch: refs/heads/branch-2.1 Commit: a18b0225ed20c5be2ab898c6ca941e4c1ab1e5f4 Parents: e276929 Author: Prasanth JayachandranAuthored: Fri May 27 16:41:50 2016 -0700 Committer: Prasanth Jayachandran Committed: Fri May 27 16:42:35 2016 -0700 -- .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 7 --- .../hive/ql/io/orc/TestInputOutputFormat.java | 21 2 files changed, 25 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/a18b0225/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 33fe3b6..087207b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -155,6 +155,7 @@ public class OrcInputFormat implements InputFormat , private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024; private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024; + private static final int DEFAULT_ETL_FILE_THRESHOLD = 100; /** * When picking the hosts for a split that crosses block boundaries, @@ -510,7 +511,7 @@ public class OrcInputFormat implements InputFormat , private final int splitStrategyBatchMs; private final long maxSize; private final long minSize; -private final int minSplits; +private final int etlFileThreshold; private final boolean footerInSplits; private final boolean cacheStripeDetails; private final boolean forceThreadpool; @@ -555,7 +556,7 @@ public class OrcInputFormat implements InputFormat , cacheStripeDetails = (cacheStripeDetailsSize > 0); - this.minSplits = Math.min(cacheStripeDetailsSize, minSplits); + this.etlFileThreshold = minSplits <= 0 ? DEFAULT_ETL_FILE_THRESHOLD : minSplits; synchronized (Context.class) { if (threadPool == null) { @@ -1938,7 +1939,7 @@ public class OrcInputFormat implements InputFormat , deltas, covered, isOriginal, ugi, allowSyntheticFileIds); default: // HYBRID strategy - if (avgFileSize > context.maxSize || totalFiles <= context.minSplits) { + if (avgFileSize > context.maxSize || totalFiles <= context.etlFileThreshold) { return combineOrCreateETLStrategy(combinedCtx, context, fs, dir, baseOrOriginalFiles, deltas, covered, isOriginal, ugi, allowSyntheticFileIds); } else { http://git-wip-us.apache.org/repos/asf/hive/blob/a18b0225/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index c1ef0e7..52098ae 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -525,6 +525,27 @@ public class TestInputOutputFormat { } } } + +k = 0; +conf.set("hive.orc.cache.stripe.details.size", "-1"); +for (int c : counts) { + for (int s : sizes) { +final FileSystem fs = generateMockFiles(c, s); +for (int n : numSplits) { + final OrcInputFormat.Context context = new OrcInputFormat.Context( + conf, n); + OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator( + context, fs, new MockPath(fs, "mock:/a/b"), false, null); + final SplitStrategy splitStrategy = createSplitStrategy(context, gen); + assertTrue( + String.format( + "Split strategy for %d files x %d size for %d splits", c, s, + n), + splitStrategy.getClass().getSimpleName() + .equals(strategyResults[k++])); +} + } +} } @Test
hive git commit: HIVE-13841: Orc split generation returns different strategies with cache enabled vs disabled (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Repository: hive Updated Branches: refs/heads/master 02b2fb5a9 -> 4e3da98d7 HIVE-13841: Orc split generation returns different strategies with cache enabled vs disabled (Prasanth Jayachandran reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4e3da98d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4e3da98d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4e3da98d Branch: refs/heads/master Commit: 4e3da98d7f05ae29c71bd379c3f59691588c0209 Parents: 02b2fb5 Author: Prasanth JayachandranAuthored: Fri May 27 16:41:50 2016 -0700 Committer: Prasanth Jayachandran Committed: Fri May 27 16:41:50 2016 -0700 -- .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 7 --- .../hive/ql/io/orc/TestInputOutputFormat.java | 21 2 files changed, 25 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/4e3da98d/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 33fe3b6..087207b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -155,6 +155,7 @@ public class OrcInputFormat implements InputFormat , private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024; private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024; + private static final int DEFAULT_ETL_FILE_THRESHOLD = 100; /** * When picking the hosts for a split that crosses block boundaries, @@ -510,7 +511,7 @@ public class OrcInputFormat implements InputFormat , private final int splitStrategyBatchMs; private final long maxSize; private final long minSize; -private final int minSplits; +private final int etlFileThreshold; private final boolean footerInSplits; private final boolean cacheStripeDetails; private final boolean forceThreadpool; @@ -555,7 +556,7 @@ public class OrcInputFormat implements InputFormat , cacheStripeDetails = (cacheStripeDetailsSize > 0); - this.minSplits = Math.min(cacheStripeDetailsSize, minSplits); + this.etlFileThreshold = minSplits <= 0 ? DEFAULT_ETL_FILE_THRESHOLD : minSplits; synchronized (Context.class) { if (threadPool == null) { @@ -1938,7 +1939,7 @@ public class OrcInputFormat implements InputFormat , deltas, covered, isOriginal, ugi, allowSyntheticFileIds); default: // HYBRID strategy - if (avgFileSize > context.maxSize || totalFiles <= context.minSplits) { + if (avgFileSize > context.maxSize || totalFiles <= context.etlFileThreshold) { return combineOrCreateETLStrategy(combinedCtx, context, fs, dir, baseOrOriginalFiles, deltas, covered, isOriginal, ugi, allowSyntheticFileIds); } else { http://git-wip-us.apache.org/repos/asf/hive/blob/4e3da98d/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index c1ef0e7..52098ae 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -525,6 +525,27 @@ public class TestInputOutputFormat { } } } + +k = 0; +conf.set("hive.orc.cache.stripe.details.size", "-1"); +for (int c : counts) { + for (int s : sizes) { +final FileSystem fs = generateMockFiles(c, s); +for (int n : numSplits) { + final OrcInputFormat.Context context = new OrcInputFormat.Context( + conf, n); + OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator( + context, fs, new MockPath(fs, "mock:/a/b"), false, null); + final SplitStrategy splitStrategy = createSplitStrategy(context, gen); + assertTrue( + String.format( + "Split strategy for %d files x %d size for %d splits", c, s, + n), + splitStrategy.getClass().getSimpleName() + .equals(strategyResults[k++])); +} + } +} } @Test