Repository: sqoop Updated Branches: refs/heads/sqoop2 c19f9c946 -> c865aefea
SQOOP-1817: Sqoop2: Update CSVIntermediate BIT data type (Veena Basavaraj via Abraham Elmahrek) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/c865aefe Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/c865aefe Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/c865aefe Branch: refs/heads/sqoop2 Commit: c865aefea62a8f0f5eabe56860b393538f03c09c Parents: c19f9c9 Author: Abraham Elmahrek <[email protected]> Authored: Fri Dec 5 15:57:39 2014 -0800 Committer: Abraham Elmahrek <[email protected]> Committed: Fri Dec 5 15:57:39 2014 -0800 ---------------------------------------------------------------------- .../idf/CSVIntermediateDataFormat.java | 57 ++++++---- .../idf/IntermediateDataFormatError.java | 9 +- .../idf/TestCSVIntermediateDataFormat.java | 106 +++++++++++++++++-- 3 files changed, 144 insertions(+), 28 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/c865aefe/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java ---------------------------------------------------------------------- diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java index d481cce..daa51eb 100644 --- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java +++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java @@ -44,6 +44,7 @@ import java.math.BigDecimal; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -89,9 +90,10 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> { // http://www.joda.org/joda-time/key_format.html provides details on the formatter token static final DateTimeFormatter dtf = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSSSSSZ"); static final DateTimeFormatter df = DateTimeFormat.forPattern("yyyy-MM-dd"); - static final DateTimeFormatter tf = DateTimeFormat.forPattern("HH:mm:ss.SSSSSSZ"); + static final DateTimeFormatter tf = DateTimeFormat.forPattern("HH:mm:ss.SSSSSS"); private final List<Integer> stringTypeColumnIndices = new ArrayList<Integer>(); + private final List<Integer> bitTypeColumnIndices = new ArrayList<Integer>(); private final List<Integer> byteTypeColumnIndices = new ArrayList<Integer>(); private final List<Integer> listTypeColumnIndices = new ArrayList<Integer>(); private final List<Integer> mapTypeColumnIndices = new ArrayList<Integer>(); @@ -99,6 +101,11 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> { private final List<Integer> dateTypeColumnIndices = new ArrayList<Integer>(); private final List<Integer> timeColumnIndices = new ArrayList<Integer>(); + static final String[] TRUE_BIT_VALUES = new String[] { "1", "true", "TRUE" }; + static final Set<String> TRUE_BIT_SET = new HashSet<String>(Arrays.asList(TRUE_BIT_VALUES)); + static final String[] FALSE_BIT_VALUES = new String[] { "0", "false", "FALSE" }; + static final Set<String> FALSE_BIT_SET = new HashSet<String>(Arrays.asList(FALSE_BIT_VALUES)); + private Schema schema; public CSVIntermediateDataFormat() { @@ -138,6 +145,8 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> { for (Column col : columns) { if (isColumnStringType(col)) { stringTypeColumnIndices.add(i); + } else if (col.getType() == ColumnType.BIT) { + bitTypeColumnIndices.add(i); } else if (col.getType() == ColumnType.DATE) { dateTypeColumnIndices.add(i); } else if (col.getType() == ColumnType.TIME) { @@ -288,8 +297,12 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> { returnValue = DateTime.parse(dateTime); break; case BIT: - returnValue = Boolean.valueOf(fieldString.equals("1") - || fieldString.toLowerCase().equals("true")); + if ((TRUE_BIT_SET.contains(fieldString)) || (FALSE_BIT_SET.contains(fieldString))) { + returnValue = TRUE_BIT_SET.contains(fieldString); + } else { + // throw an exception for any unsupported value for BITs + throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0009, " given bit value: " + fieldString); + } break; case ARRAY: case SET: @@ -425,36 +438,44 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> { /** * Sanitize every element of the CSV string based on the column type * - * @param stringArray + * @param objectArray */ @SuppressWarnings("unchecked") - private void encodeCSVStringElements(Object[] stringArray, Column[] columnArray) { + private void encodeCSVStringElements(Object[] objectArray, Column[] columnArray) { + for (int i : bitTypeColumnIndices) { + String bitStringValue = objectArray[i].toString(); + if ((TRUE_BIT_SET.contains(bitStringValue)) || (FALSE_BIT_SET.contains(bitStringValue))) { + objectArray[i] = bitStringValue; + } else { + throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0009, " given bit value: " + objectArray[i]); + } + } for (int i : stringTypeColumnIndices) { - stringArray[i] = escapeString((String) stringArray[i]); + objectArray[i] = escapeString((String) objectArray[i]); } for (int i : dateTimeTypeColumnIndices) { - if (stringArray[i] instanceof org.joda.time.DateTime) { - stringArray[i] = encloseWithQuote(dtf.print((org.joda.time.DateTime) stringArray[i])); - } else if (stringArray[i] instanceof org.joda.time.LocalDateTime) { - stringArray[i] = encloseWithQuote(dtf.print((org.joda.time.LocalDateTime) stringArray[i])); + if (objectArray[i] instanceof org.joda.time.DateTime) { + objectArray[i] = encloseWithQuote(dtf.print((org.joda.time.DateTime) objectArray[i])); + } else if (objectArray[i] instanceof org.joda.time.LocalDateTime) { + objectArray[i] = encloseWithQuote(dtf.print((org.joda.time.LocalDateTime) objectArray[i])); } } for (int i : dateTypeColumnIndices) { - org.joda.time.LocalDate date = (org.joda.time.LocalDate) stringArray[i]; - stringArray[i] = encloseWithQuote(df.print(date)); + org.joda.time.LocalDate date = (org.joda.time.LocalDate) objectArray[i]; + objectArray[i] = encloseWithQuote(df.print(date)); } for (int i : timeColumnIndices) { - org.joda.time.LocalTime date = (org.joda.time.LocalTime) stringArray[i]; - stringArray[i] = encloseWithQuote(tf.print(date)); + org.joda.time.LocalTime date = (org.joda.time.LocalTime) objectArray[i]; + objectArray[i] = encloseWithQuote(tf.print(date)); } for (int i : byteTypeColumnIndices) { - stringArray[i] = escapeByteArrays((byte[]) stringArray[i]); + objectArray[i] = escapeByteArrays((byte[]) objectArray[i]); } for (int i : listTypeColumnIndices) { - stringArray[i] = encodeList((Object[]) stringArray[i], columnArray[i]); + objectArray[i] = encodeList((Object[]) objectArray[i], columnArray[i]); } for (int i : mapTypeColumnIndices) { - stringArray[i] = encodeMap((Map<Object, Object>) stringArray[i], columnArray[i]); + objectArray[i] = encodeMap((Map<Object, Object>) objectArray[i], columnArray[i]); } } @@ -571,4 +592,4 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> { public String toString() { return data; } -} \ No newline at end of file +} http://git-wip-us.apache.org/repos/asf/sqoop/blob/c865aefe/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormatError.java ---------------------------------------------------------------------- diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormatError.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormatError.java index 665418d..4b0dd88 100644 --- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormatError.java +++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormatError.java @@ -36,13 +36,18 @@ public enum IntermediateDataFormatError implements ErrorCode { /** Column type isn't known by Intermediate Data Format. */ INTERMEDIATE_DATA_FORMAT_0004("Unknown column type."), - /** Number of fields. */ - INTERMEDIATE_DATA_FORMAT_0005("Wrong number of fields."), + /** Number of columns in schema does not match the data set. */ + INTERMEDIATE_DATA_FORMAT_0005("Wrong number of columns."), + /** Schema is missing in the IDF. */ INTERMEDIATE_DATA_FORMAT_0006("Schema missing."), + /** For arrays and maps we use JSON representation and incorrect representation results in parse exception*/ INTERMEDIATE_DATA_FORMAT_0008("JSON parse internal error."), + /** Unsupported bit values */ + INTERMEDIATE_DATA_FORMAT_0009("Unsupported bit value."), + ; private final String message; http://git-wip-us.apache.org/repos/asf/sqoop/blob/c865aefe/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java ---------------------------------------------------------------------- diff --git a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java index b348ed8..8a032ef 100644 --- a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java +++ b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java @@ -442,28 +442,118 @@ public class TestCSVIntermediateDataFormat { } } + // **************test cases for BIT******************* + + @Test + public void testBitTrueFalseWithCSVTextInAndCSVTextOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Bit("1")); + dataFormat.setSchema(schema); + + for (String trueBit : new String[] { "true", "TRUE" }) { + dataFormat.setTextData(trueBit); + assertTrue(Boolean.valueOf(dataFormat.getTextData())); + } + + for (String falseBit : new String[] { "false", "FALSE" }) { + dataFormat.setTextData(falseBit); + assertFalse(Boolean.valueOf(dataFormat.getTextData())); + } + } + + @Test + public void testBitWithCSVTextInAndCSVTextOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Bit("1")); + dataFormat.setSchema(schema); + dataFormat.setTextData("1"); + assertEquals("1", dataFormat.getTextData()); + dataFormat.setTextData("0"); + assertEquals("0", dataFormat.getTextData()); + } + + @Test + public void testBitWithObjectArrayInAndCSVTextOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Bit("1")).addColumn(new Bit("2")); + dataFormat.setSchema(schema); + Object[] data = new Object[2]; + data[0] = Boolean.TRUE; + data[1] = Boolean.FALSE; + dataFormat.setObjectData(data); + assertEquals("true,false", dataFormat.getTextData()); + } + + @Test(expected = SqoopException.class) + public void testUnsupportedBitWithObjectArrayInAndCSVTextOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Bit("1")).addColumn(new Bit("2")); + dataFormat.setSchema(schema); + Object[] data = new Object[2]; + data[0] = "1"; + data[1] = "2"; + dataFormat.setObjectData(data); + assertEquals("1,2", dataFormat.getTextData()); + } + @Test - public void testBit() { + public void testBitWithObjectArrayInAndObjectOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Bit("1")).addColumn(new Bit("2")); + dataFormat.setSchema(schema); + Object[] data = new Object[2]; + data[0] = Boolean.TRUE; + data[1] = Boolean.FALSE; + dataFormat.setObjectData(data); + assertEquals(true, dataFormat.getObjectData()[0]); + assertEquals(false, dataFormat.getObjectData()[1]); + data[0] = "1"; + data[1] = "0"; + dataFormat.setObjectData(data); + assertEquals(true, dataFormat.getObjectData()[0]); + assertEquals(false, dataFormat.getObjectData()[1]); + } + + public void testBitWithCSVTextInAndObjectArrayOut() { Schema schema = new Schema("test"); schema.addColumn(new Bit("1")); dataFormat.setSchema(schema); - for (String trueBit : new String[]{ - "true", "TRUE", "1" - }) { + for (String trueBit : new String[] { "true", "TRUE", "1" }) { dataFormat.setTextData(trueBit); assertTrue((Boolean) dataFormat.getObjectData()[0]); } - for (String falseBit : new String[]{ - "false", "FALSE", "0" - }) { + for (String falseBit : new String[] { "false", "FALSE", "0" }) { dataFormat.setTextData(falseBit); assertFalse((Boolean) dataFormat.getObjectData()[0]); } } - //**************test cases for arrays******************* + @Test(expected = SqoopException.class) + public void testUnsupportedBitWithObjectArrayInAndObjectOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Bit("1")).addColumn(new Bit("2")); + dataFormat.setSchema(schema); + Object[] data = new Object[2]; + data[0] = "1"; + data[1] = "2"; + dataFormat.setObjectData(data); + assertEquals(true, dataFormat.getObjectData()[0]); + assertEquals(false, dataFormat.getObjectData()[1]); + } + + @Test(expected = SqoopException.class) + public void testUnsupportedBitWithCSVTextInAndObjectOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Bit("1")).addColumn(new Bit("2")); + dataFormat.setSchema(schema); + dataFormat.setTextData("1,3"); + assertEquals(true, dataFormat.getObjectData()[0]); + assertEquals(false, dataFormat.getObjectData()[1]); + } + + // **************test cases for arrays******************* @Test public void testArrayOfStringWithObjectArrayInObjectArrayOut() { Schema schema = new Schema("test");
