Updated Branches: refs/heads/trunk 75f2b707b -> 5616152ac
SQOOP-603 Support small intervals in IntegerSplitter implementation (Jarek Jarcec Cecho via Cheolsoo Park) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/5616152a Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/5616152a Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/5616152a Branch: refs/heads/trunk Commit: 5616152ac4c96d6c0589768b982cf67f3277df74 Parents: 75f2b70 Author: Cheolsoo Park <[email protected]> Authored: Thu Sep 20 12:14:15 2012 -0700 Committer: Cheolsoo Park <[email protected]> Committed: Thu Sep 20 12:14:15 2012 -0700 ---------------------------------------------------------------------- .../mapreduce/db/DataDrivenDBInputFormat.java | 3 ++ .../apache/sqoop/mapreduce/db/IntegerSplitter.java | 18 +++++++++++++++ .../sqoop/mapreduce/db/TestIntegerSplitter.java | 8 +++++- 3 files changed, 28 insertions(+), 1 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/5616152a/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java b/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java index 35b74eb..6f4b208 100644 --- a/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java +++ b/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java @@ -319,6 +319,9 @@ public class DataDrivenDBInputFormat<T extends DBWritable> public DataDrivenDBInputSplit(final String lower, final String upper) { this.lowerBoundClause = lower; this.upperBoundClause = upper; + + LOG.debug("Creating input split with lower bound '" + lower + + "' and upper bound '" + upper + "'"); } /** http://git-wip-us.apache.org/repos/asf/sqoop/blob/5616152a/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java b/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java index 8e7a096..e6fefc6 100644 --- a/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java +++ b/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java @@ -141,6 +141,24 @@ public class IntegerSplitter implements DBSplitter { if (splits.size() == 1) { // make a valid singleton split splits.add(maxVal); + } else if ((maxVal - minVal) <= numSplits) { + // Edge case when there is lesser split points (intervals) then + // requested number of splits. In such case we are creating last split + // with two values, for example interval [1, 5] broken down into 5 + // splits will create following conditions: + // * 1 <= x < 2 + // * 2 <= x < 3 + // * 3 <= x < 4 + // * 4 <= x <= 5 + // Notice that the last split have twice more data than others. In + // those cases we add one maxVal at the end to create following splits + // instead: + // * 1 <= x < 2 + // * 2 <= x < 3 + // * 3 <= x < 4 + // * 4 <= x < 5 + // * 5 <= x <= 5 + splits.add(maxVal); } return splits; http://git-wip-us.apache.org/repos/asf/sqoop/blob/5616152a/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java b/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java index 22d5140..136afc7 100644 --- a/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java +++ b/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java @@ -102,7 +102,13 @@ public class TestIntegerSplitter extends TestCase { public void testTooManySplits() throws SQLException { List<Long> splits = new IntegerSplitter().split(5, 3, 5); - long [] expected = { 3, 4, 5 }; + long [] expected = { 3, 4, 5, 5}; + assertLongArrayEquals(expected, toLongArray(splits)); + } + + public void testExactSplitsAsInterval() throws SQLException { + List<Long> splits = new IntegerSplitter().split(5, 1, 5); + long [] expected = { 1, 2, 3, 4, 5, 5}; assertLongArrayEquals(expected, toLongArray(splits)); }
