Repository: sqoop Updated Branches: refs/heads/sqoop2 2d54e26a0 -> 7631d2933
SQOOP-1936: Sqoop2: Sort by comparing IDF data in shuffle phase (Veena Basavaraj via Abraham Elmahrek) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/7631d293 Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/7631d293 Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/7631d293 Branch: refs/heads/sqoop2 Commit: 7631d293335855fd42a319c1fcb56530c27f78a4 Parents: 2d54e26 Author: Abraham Elmahrek <[email protected]> Authored: Mon Jan 12 21:16:13 2015 -0800 Committer: Abraham Elmahrek <[email protected]> Committed: Mon Jan 12 21:16:13 2015 -0800 ---------------------------------------------------------------------- .../connector/idf/CSVIntermediateDataFormat.java | 7 +++---- .../sqoop/connector/idf/IntermediateDataFormat.java | 16 +++++++++++++++- .../connector/idf/JSONIntermediateDataFormat.java | 4 ++++ .../idf/TestCSVIntermediateDataFormat.java | 3 +-- .../java/org/apache/sqoop/job/io/SqoopWritable.java | 4 ++-- .../org/apache/sqoop/job/io/TestSqoopWritable.java | 9 --------- 6 files changed, 25 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/7631d293/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java ---------------------------------------------------------------------- diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java index 2af6acd..4870fae 100644 --- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java +++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java @@ -58,8 +58,7 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> { */ @Override public String getCSVTextData() { - // TODO:SQOOP-1936 to enable schema validation after we use compareTo - return this.data; + return super.getData(); } /** @@ -81,14 +80,14 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> { if (csvStringArray == null) { return null; } + Column[] columns = schema.getColumnsArray(); - if (csvStringArray.length != schema.getColumnsArray().length) { + if (csvStringArray.length != columns.length) { throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0001, "The data " + getCSVTextData() + " has the wrong number of fields."); } Object[] objectArray = new Object[csvStringArray.length]; - Column[] columns = schema.getColumnsArray(); for (int i = 0; i < csvStringArray.length; i++) { if (csvStringArray[i].equals(NULL_VALUE) && !columns[i].isNullable()) { throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0005, http://git-wip-us.apache.org/repos/asf/sqoop/blob/7631d293/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormat.java ---------------------------------------------------------------------- diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormat.java index 6063320..6f945c2 100644 --- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormat.java +++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormat.java @@ -44,10 +44,13 @@ import java.util.Set; * Any conversion to the format dictated by the corresponding data source from the native or CSV text format * has to be done by the connector themselves both in FROM and TO * + * NOTE: we cannot use the generic for comparable, since the comparison can be arbitrary for instance, + * purely based on text format * @param <T> - Each data format may have a native representation of the * data, represented by the parameter. */ -public abstract class IntermediateDataFormat<T> { +@SuppressWarnings("rawtypes") +public abstract class IntermediateDataFormat<T> implements Comparable { protected volatile T data; @@ -203,4 +206,15 @@ public abstract class IntermediateDataFormat<T> { return true; } + @Override + public String toString() { + return this.data.toString(); + } + + @Override + public int compareTo(Object o) { + IntermediateDataFormat<?> idf = (IntermediateDataFormat<?>) o; + return toString().compareTo(idf.toString()); + } + } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/sqoop/blob/7631d293/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java ---------------------------------------------------------------------- diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java index 3cfd356..c8df6e0 100644 --- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java +++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java @@ -419,4 +419,8 @@ public class JSONIntermediateDataFormat extends IntermediateDataFormat<JSONObjec return object; } + @Override + public String toString() { + return this.data.toJSONString(); + } } http://git-wip-us.apache.org/repos/asf/sqoop/blob/7631d293/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java ---------------------------------------------------------------------- diff --git a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java index 861d34e..d2b0ae0 100644 --- a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java +++ b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java @@ -1143,8 +1143,7 @@ public class TestCSVIntermediateDataFormat { dataFormat.getData(); } - //SQOOP-1936 to enable schema validation after we use compareTo - @Test + @Test(expectedExceptions = SqoopException.class) public void testNotSettingSchemaAndGetCSVData() { dataFormat = new CSVIntermediateDataFormat(); dataFormat.getCSVTextData(); http://git-wip-us.apache.org/repos/asf/sqoop/blob/7631d293/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/SqoopWritable.java ---------------------------------------------------------------------- diff --git a/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/SqoopWritable.java b/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/SqoopWritable.java index 08c2031..59ad311 100644 --- a/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/SqoopWritable.java +++ b/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/SqoopWritable.java @@ -68,12 +68,12 @@ public class SqoopWritable implements Configurable, WritableComparable<SqoopWrit @Override public int compareTo(SqoopWritable o) { - return toString().compareTo(o.toString()); + return toIDF.compareTo(o.toIDF); } @Override public String toString() { - return toIDF.getCSVTextData(); + return toIDF.toString(); } @Override http://git-wip-us.apache.org/repos/asf/sqoop/blob/7631d293/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestSqoopWritable.java ---------------------------------------------------------------------- diff --git a/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestSqoopWritable.java b/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestSqoopWritable.java index 6a14201..452e085 100644 --- a/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestSqoopWritable.java +++ b/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestSqoopWritable.java @@ -51,15 +51,6 @@ public class TestSqoopWritable { } @Test - public void testStringInStringOut() { - String testData = "Live Long and prosper"; - writable.setString(testData); - verify(idfMock, times(1)).setCSVTextData(testData); - writable.toString(); - verify(idfMock, times(1)).getCSVTextData(); - } - - @Test public void testWrite() throws IOException { String testData = "One ring to rule them all"; ByteArrayOutputStream ostream = new ByteArrayOutputStream();
