Repository: sqoop Updated Branches: refs/heads/trunk 5fc7a680f -> fdcbaf5c5
SQOOP-2399: BigDecimalSplitter java.lang.ArrayIndexOutOfBoundsException (Sowmya Ramesh via Venkat Ranganathan) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/fdcbaf5c Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/fdcbaf5c Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/fdcbaf5c Branch: refs/heads/trunk Commit: fdcbaf5c5356af0f86f35894ab87e26c2a6419b1 Parents: 5fc7a68 Author: Venkat Ranganathan <[email protected]> Authored: Fri Sep 18 12:12:07 2015 -0700 Committer: Venkat Ranganathan <[email protected]> Committed: Fri Sep 18 12:12:07 2015 -0700 ---------------------------------------------------------------------- .../sqoop/mapreduce/db/BigDecimalSplitter.java | 10 ++++++++-- .../org/apache/sqoop/mapreduce/db/DBSplitter.java | 3 ++- .../mapreduce/db/DataDrivenDBInputFormat.java | 10 +++++++--- .../apache/sqoop/mapreduce/db/TextSplitter.java | 10 ++++++++-- .../sqoop/mapreduce/db/TestTextSplitter.java | 17 +++++++++++++++-- 5 files changed, 40 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/fdcbaf5c/src/java/org/apache/sqoop/mapreduce/db/BigDecimalSplitter.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/mapreduce/db/BigDecimalSplitter.java b/src/java/org/apache/sqoop/mapreduce/db/BigDecimalSplitter.java index ebe6c40..a8db2a3 100644 --- a/src/java/org/apache/sqoop/mapreduce/db/BigDecimalSplitter.java +++ b/src/java/org/apache/sqoop/mapreduce/db/BigDecimalSplitter.java @@ -31,6 +31,7 @@ import org.apache.hadoop.mapreduce.InputSplit; import com.cloudera.sqoop.config.ConfigurationHelper; import com.cloudera.sqoop.mapreduce.db.DBSplitter; import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat; +import org.apache.sqoop.validation.ValidationException; /** * Implement DBSplitter over BigDecimal values. @@ -39,7 +40,7 @@ public class BigDecimalSplitter implements DBSplitter { private static final Log LOG = LogFactory.getLog(BigDecimalSplitter.class); public List<InputSplit> split(Configuration conf, ResultSet results, - String colName) throws SQLException { + String colName) throws SQLException, ValidationException { BigDecimal minVal = results.getBigDecimal(1); BigDecimal maxVal = results.getBigDecimal(2); @@ -140,7 +141,12 @@ public class BigDecimalSplitter implements DBSplitter { curVal = curVal.add(splitSize); } - if (splits.get(splits.size() - 1).compareTo(maxVal) != 0 + /* + * If the sort order and collation of the char columns differ we can have + * a situation where minVal > maxVal and splits can be empty list. + */ + + if ((splits.size() > 1 && splits.get(splits.size() - 1).compareTo(maxVal) != 0) || splits.size() == 1) { // We didn't end on the maxVal. Add that to the end of the list. splits.add(maxVal); http://git-wip-us.apache.org/repos/asf/sqoop/blob/fdcbaf5c/src/java/org/apache/sqoop/mapreduce/db/DBSplitter.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/mapreduce/db/DBSplitter.java b/src/java/org/apache/sqoop/mapreduce/db/DBSplitter.java index b121d4b..9ea8caf 100644 --- a/src/java/org/apache/sqoop/mapreduce/db/DBSplitter.java +++ b/src/java/org/apache/sqoop/mapreduce/db/DBSplitter.java @@ -23,6 +23,7 @@ import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.sqoop.validation.ValidationException; /** * DBSplitter will generate DBInputSplits to use with DataDrivenDBInputFormat. @@ -40,5 +41,5 @@ public interface DBSplitter { * type), determine a set of splits that span the given values. */ List<InputSplit> split(Configuration conf, ResultSet results, String colName) - throws SQLException; + throws SQLException, ValidationException; } http://git-wip-us.apache.org/repos/asf/sqoop/blob/fdcbaf5c/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java b/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java index db96e41..136b30a 100644 --- a/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java +++ b/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java @@ -51,7 +51,7 @@ import com.cloudera.sqoop.mapreduce.db.DateSplitter; import com.cloudera.sqoop.mapreduce.db.FloatSplitter; import com.cloudera.sqoop.mapreduce.db.IntegerSplitter; import com.cloudera.sqoop.mapreduce.db.TextSplitter; -import com.cloudera.sqoop.mapreduce.db.DBInputFormat.DBInputSplit; +import org.apache.sqoop.validation.ValidationException; /** * A InputFormat that reads input data from an SQL table. @@ -197,8 +197,12 @@ public class DataDrivenDBInputFormat<T extends DBWritable> + " type: " + sqlDataType); } - return splitter.split(job.getConfiguration(), results, - getDBConf().getInputOrderBy()); + try { + return splitter.split(job.getConfiguration(), results, + getDBConf().getInputOrderBy()); + } catch (ValidationException e) { + throw new IOException(e); + } } catch (SQLException e) { throw new IOException(e); } finally { http://git-wip-us.apache.org/repos/asf/sqoop/blob/fdcbaf5c/src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java b/src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java index d3085cd..9896d95 100644 --- a/src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java +++ b/src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java @@ -31,6 +31,7 @@ import org.apache.hadoop.mapreduce.InputSplit; import com.cloudera.sqoop.config.ConfigurationHelper; import com.cloudera.sqoop.mapreduce.db.BigDecimalSplitter; import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat; +import org.apache.sqoop.validation.ValidationException; /** * Implement DBSplitter over text strings. @@ -59,7 +60,7 @@ public class TextSplitter extends BigDecimalSplitter { * points, then map the resulting floating point values back into strings. */ public List<InputSplit> split(Configuration conf, ResultSet results, - String colName) throws SQLException { + String colName) throws SQLException, ValidationException { LOG.warn("Generating splits for a textual index column."); LOG.warn("If your database sorts in a case-insensitive order, " @@ -146,11 +147,16 @@ public class TextSplitter extends BigDecimalSplitter { } public List<String> split(int numSplits, String minString, - String maxString, String commonPrefix) throws SQLException { + String maxString, String commonPrefix) throws SQLException, ValidationException { BigDecimal minVal = stringToBigDecimal(minString); BigDecimal maxVal = stringToBigDecimal(maxString); + + if (minVal.compareTo(maxVal) > 0) { + throw new ValidationException( minVal + " is greater than " + maxVal); + } + List<BigDecimal> splitPoints = split( new BigDecimal(numSplits), minVal, maxVal); List<String> splitStrings = new ArrayList<String>(); http://git-wip-us.apache.org/repos/asf/sqoop/blob/fdcbaf5c/src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java b/src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java index 9c007d3..5cfb0a5 100644 --- a/src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java +++ b/src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java @@ -24,6 +24,8 @@ import java.util.List; import com.cloudera.sqoop.mapreduce.db.TextSplitter; import junit.framework.TestCase; +import junit.framework.Test; +import org.apache.sqoop.validation.ValidationException; /** * Test that the TextSplitter implementation creates a sane set of splits. @@ -113,7 +115,7 @@ public class TestTextSplitter extends TestCase { assertEquals("AVeryLon", out); } - public void testAlphabetSplit() throws SQLException { + public void testAlphabetSplit() throws SQLException, ValidationException { // This should give us 25 splits, one per letter. TextSplitter splitter = new TextSplitter(); List<String> splits = splitter.split(25, "A", "Z", ""); @@ -123,7 +125,18 @@ public class TestTextSplitter extends TestCase { assertArrayEquals(expected, splits.toArray(new String [0])); } - public void testCommonPrefix() throws SQLException { + public void testAlphabetSplitWhenMinStringGreaterThanMaxString() throws SQLException { + TextSplitter splitter = new TextSplitter(); + try { + splitter.split(4, "Z", "A", ""); + fail(); + } catch (ValidationException e) { + // expected + assertTrue(true); + } + } + + public void testCommonPrefix() throws SQLException, ValidationException { // Splits between 'Hand' and 'Hardy' TextSplitter splitter = new TextSplitter(); List<String> splits = splitter.split(5, "nd", "rdy", "Ha");
