Repository: sqoop Updated Branches: refs/heads/sqoop2 49d6e2687 -> ae31a0237
SQOOP-1815: Sqoop2: Date and DateTime is not encoded in Single Quotes Also includes unit tests. (Veena Basavaraj via Abraham Elmahrek) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/ae31a023 Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/ae31a023 Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/ae31a023 Branch: refs/heads/sqoop2 Commit: ae31a02372278cbed6c435dc042cdbc5d20f2090 Parents: 49d6e26 Author: Abraham Elmahrek <[email protected]> Authored: Wed Dec 3 23:47:24 2014 -0800 Committer: Abraham Elmahrek <[email protected]> Committed: Wed Dec 3 23:48:34 2014 -0800 ---------------------------------------------------------------------- .../idf/CSVIntermediateDataFormat.java | 33 ++++-- .../idf/TestCSVIntermediateDataFormat.java | 104 ++++++++++++++++--- .../org/apache/sqoop/driver/JobManager.java | 5 + .../connector/jdbc/generic/PartitionerTest.java | 40 +++---- 4 files changed, 139 insertions(+), 43 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/ae31a023/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java ---------------------------------------------------------------------- diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java index bd0fbf0..a075d3f 100644 --- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java +++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java @@ -27,8 +27,10 @@ import org.apache.sqoop.schema.type.Column; import org.apache.sqoop.schema.type.ColumnType; import org.apache.sqoop.schema.type.FixedPoint; import org.apache.sqoop.schema.type.FloatingPoint; +import org.joda.time.DateTime; import org.joda.time.LocalDate; -import org.joda.time.LocalDateTime; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; import org.json.simple.JSONArray; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; @@ -47,7 +49,6 @@ import java.util.Map; import java.util.Set; import java.util.regex.Matcher; - /** * A concrete implementation for the {@link #IntermediateDataFormat} that * represents each row of the data source as a comma separates list. Each @@ -85,11 +86,16 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> { // ISO-8859-1 is an 8-bit codec that is supported in every java // implementation. static final String BYTE_FIELD_CHARSET = "ISO-8859-1"; + //http://www.joda.org/joda-time/key_format.html provides details on the formatter token + static final DateTimeFormatter dtf = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS'Z'"); + static final DateTimeFormatter df = DateTimeFormat.forPattern("yyyy-MM-dd"); private final List<Integer> stringTypeColumnIndices = new ArrayList<Integer>(); private final List<Integer> byteTypeColumnIndices = new ArrayList<Integer>(); private final List<Integer> listTypeColumnIndices = new ArrayList<Integer>(); private final List<Integer> mapTypeColumnIndices = new ArrayList<Integer>(); + private final List<Integer> dateTimeTypeColumnIndices = new ArrayList<Integer>(); + private final List<Integer> dateTypeColumnIndices = new ArrayList<Integer>(); private Schema schema; @@ -128,8 +134,12 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> { List<Column> columns = schema.getColumns(); int i = 0; for (Column col : columns) { - if (isColumnStringType(col) ) { + if (isColumnStringType(col)) { stringTypeColumnIndices.add(i); + } else if (col.getType() == ColumnType.DATE) { + dateTypeColumnIndices.add(i); + } else if (col.getType() == ColumnType.DATE_TIME) { + dateTimeTypeColumnIndices.add(i); } else if (col.getType() == ColumnType.BINARY) { byteTypeColumnIndices.add(i); } else if (isColumnListType(col)) { @@ -261,14 +271,14 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> { returnValue = new BigDecimal(fieldString); break; case DATE: - returnValue = LocalDate.parse(fieldString); + returnValue = LocalDate.parse(removeQuotes(fieldString)); break; case DATE_TIME: // A datetime string with a space as date-time separator will not be // parsed expectedly. The expected separator is "T". See also: // https://github.com/JodaOrg/joda-time/issues/11 - String iso8601 = fieldString.replace(" ", "T"); - returnValue = LocalDateTime.parse(iso8601); + String dateTime = removeQuotes(fieldString).replace(" ", "T"); + returnValue = DateTime.parse(dateTime); break; case BIT: returnValue = Boolean.valueOf(fieldString.equals("1") @@ -415,6 +425,17 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> { for (int i : stringTypeColumnIndices) { stringArray[i] = escapeString((String) stringArray[i]); } + for (int i : dateTimeTypeColumnIndices) { + if (stringArray[i] instanceof org.joda.time.DateTime) { + stringArray[i] = encloseWithQuote(dtf.print((org.joda.time.DateTime) stringArray[i])); + } else if (stringArray[i] instanceof org.joda.time.LocalDateTime) { + stringArray[i] = encloseWithQuote(dtf.print((org.joda.time.LocalDateTime) stringArray[i])); + } + } + for (int i : dateTypeColumnIndices) { + org.joda.time.LocalDate date = (org.joda.time.LocalDate) stringArray[i]; + stringArray[i] = encloseWithQuote(df.print(date)); + } for (int i : byteTypeColumnIndices) { stringArray[i] = escapeByteArrays((byte[]) stringArray[i]); } http://git-wip-us.apache.org/repos/asf/sqoop/blob/ae31a023/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java ---------------------------------------------------------------------- diff --git a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java index 75fe429..bf15c69 100644 --- a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java +++ b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java @@ -272,38 +272,111 @@ public class TestCSVIntermediateDataFormat { assertTrue(Arrays.deepEquals(inCopy, dataFormat.getObjectData())); } - //**************test cases for date/datetime******************* + // **************test cases for date******************* @Test - public void testDate() { + public void testDateWithCSVTextInCSVTextOut() { Schema schema = new Schema("test"); schema.addColumn(new Date("1")); dataFormat.setSchema(schema); + dataFormat.setTextData("'2014-10-01'"); + assertEquals("'2014-10-01'", dataFormat.getTextData()); + } - dataFormat.setTextData("2014-10-01"); - assertEquals("2014-10-01", dataFormat.getObjectData()[0].toString()); + @Test + public void testDateWithCSVTextInObjectArrayOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Date("1")); + dataFormat.setSchema(schema); + dataFormat.setTextData("'2014-10-01'"); + org.joda.time.LocalDate date = new org.joda.time.LocalDate(2014, 10, 01); + assertEquals(date.toString(), dataFormat.getObjectData()[0].toString()); } @Test - public void testDateTime() { + public void testDateWithObjectArrayInCSVTextOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Date("1")).addColumn(new Text("2")); + dataFormat.setSchema(schema); + org.joda.time.LocalDate date = new org.joda.time.LocalDate(2014, 10, 01); + Object[] in = { date, "test" }; + dataFormat.setObjectData(in); + assertEquals("'2014-10-01','test'", dataFormat.getTextData()); + } + + @Test + public void testDateWithObjectArrayInObjectArrayOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Date("1")); + dataFormat.setSchema(schema); + org.joda.time.LocalDate date = new org.joda.time.LocalDate(2014, 10, 01); + Object[] in = { date }; + dataFormat.setObjectData(in); + assertEquals(date.toString(), dataFormat.getObjectData()[0].toString()); + } + + // **************test cases for dateTime******************* + + @Test + public void testDateTimeWithCSVTextInCSVTextOut() { Schema schema = new Schema("test"); schema.addColumn(new DateTime("1")); dataFormat.setSchema(schema); - for (String dateTime : new String[]{ - "2014-10-01T12:00:00", - "2014-10-01T12:00:00.000" - }) { + dataFormat.setTextData("'2014-10-01 12:00:00'"); + assertEquals("'2014-10-01 12:00:00'", dataFormat.getTextData()); + } + + @Test + public void testDateTimeWithCSVTextInObjectArrayOut() { + Schema schema = new Schema("test"); + schema.addColumn(new DateTime("1")); + dataFormat.setSchema(schema); + + dataFormat.setTextData("'2014-10-01 12:00:00'"); + assertEquals("2014-10-01T12:00:00.000-07:00", dataFormat.getObjectData()[0].toString()); + } + + @Test + public void testDateTimeWithObjectInCSVTextOut() { + Schema schema = new Schema("test"); + schema.addColumn(new DateTime("1")); + dataFormat.setSchema(schema); + org.joda.time.DateTime dateTime = new org.joda.time.DateTime(2014, 10, 01, 12, 0, 0, 0); + Object[] in = { dateTime }; + dataFormat.setObjectData(in); + assertEquals("'2014-10-01 12:00:00.000Z'", dataFormat.getTextData()); + } + + @Test + public void testLocalDateTimeWithObjectInCSVTextOut() { + Schema schema = new Schema("test"); + schema.addColumn(new DateTime("1")); + dataFormat.setSchema(schema); + org.joda.time.LocalDateTime dateTime = new org.joda.time.LocalDateTime(2014, 10, 01, 12, 0, 0, + 0); + Object[] in = { dateTime }; + dataFormat.setObjectData(in); + assertEquals("'2014-10-01 12:00:00.000Z'", dataFormat.getTextData()); + } + + @Test + public void testDateTimePrecisionWithCSVTextInObjectArrayOut() { + Schema schema = new Schema("test"); + schema.addColumn(new DateTime("1")); + dataFormat.setSchema(schema); + + for (String dateTime : new String[] { "'2014-10-01 12:00:00.000'" }) { dataFormat.setTextData(dateTime); - assertEquals("2014-10-01T12:00:00.000", dataFormat.getObjectData()[0].toString()); + assertEquals("2014-10-01T12:00:00.000-07:00", dataFormat.getObjectData()[0].toString()); } } /** * In ISO8601 "T" is used as date-time separator. Unfortunately in the real - * world, database (confirmed with mysql and postgres) might return a datatime + * world, database (confirmed with mysql and postgres) might return a datetime * string with a space as separator. The test case intends to check, whether - * such datatime string can be handled expectedly. + * such datetime string can be handled expectedly. */ @Test public void testDateTimeISO8601Alternative() { @@ -311,12 +384,9 @@ public class TestCSVIntermediateDataFormat { schema.addColumn(new DateTime("1")); dataFormat.setSchema(schema); - for (String dateTime : new String[]{ - "2014-10-01 12:00:00", - "2014-10-01 12:00:00.000" - }) { + for (String dateTime : new String[] { "'2014-10-01 12:00:00'", "'2014-10-01 12:00:00.000'" }) { dataFormat.setTextData(dateTime); - assertEquals("2014-10-01T12:00:00.000", dataFormat.getObjectData()[0].toString()); + assertEquals("2014-10-01T12:00:00.000-07:00", dataFormat.getObjectData()[0].toString()); } } http://git-wip-us.apache.org/repos/asf/sqoop/blob/ae31a023/core/src/main/java/org/apache/sqoop/driver/JobManager.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/sqoop/driver/JobManager.java b/core/src/main/java/org/apache/sqoop/driver/JobManager.java index d6efa6d..f4f5561 100644 --- a/core/src/main/java/org/apache/sqoop/driver/JobManager.java +++ b/core/src/main/java/org/apache/sqoop/driver/JobManager.java @@ -47,8 +47,10 @@ import org.apache.sqoop.schema.Schema; import org.apache.sqoop.submission.SubmissionStatus; import org.apache.sqoop.submission.counter.Counters; import org.apache.sqoop.utils.ClassUtils; +import org.joda.time.DateTime; import org.joda.time.LocalDate; import org.joda.time.LocalDateTime; +import org.joda.time.LocalTime; import org.json.simple.JSONValue; public class JobManager implements Reconfigurable { @@ -397,8 +399,11 @@ public class JobManager implements Reconfigurable { jobRequest.addJarForClass(executionEngine.getClass()); // Extra libraries that Sqoop code requires jobRequest.addJarForClass(JSONValue.class); + // Add JODA classes for IDF date/time handling jobRequest.addJarForClass(LocalDate.class); jobRequest.addJarForClass(LocalDateTime.class); + jobRequest.addJarForClass(DateTime.class); + jobRequest.addJarForClass(LocalTime.class); } MSubmission createJobSubmission(HttpEventContext ctx, long jobId) { http://git-wip-us.apache.org/repos/asf/sqoop/blob/ae31a023/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/PartitionerTest.java ---------------------------------------------------------------------- diff --git a/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/PartitionerTest.java b/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/PartitionerTest.java index 824a51d..bd34911 100644 --- a/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/PartitionerTest.java +++ b/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/PartitionerTest.java @@ -103,26 +103,26 @@ public class PartitionerTest extends ConnectorTestCase { // Assert correct output assertToFiles((extractors > maxOutputFiles) ? maxOutputFiles : extractors); assertTo( - "1,'Warty Warthog',4.10,2004-10-20,false", - "2,'Hoary Hedgehog',5.04,2005-04-08,false", - "3,'Breezy Badger',5.10,2005-10-13,false", - "4,'Dapper Drake',6.06,2006-06-01,true", - "5,'Edgy Eft',6.10,2006-10-26,false", - "6,'Feisty Fawn',7.04,2007-04-19,false", - "7,'Gutsy Gibbon',7.10,2007-10-18,false", - "8,'Hardy Heron',8.04,2008-04-24,true", - "9,'Intrepid Ibex',8.10,2008-10-18,false", - "10,'Jaunty Jackalope',9.04,2009-04-23,false", - "11,'Karmic Koala',9.10,2009-10-29,false", - "12,'Lucid Lynx',10.04,2010-04-29,true", - "13,'Maverick Meerkat',10.10,2010-10-10,false", - "14,'Natty Narwhal',11.04,2011-04-28,false", - "15,'Oneiric Ocelot',11.10,2011-10-10,false", - "16,'Precise Pangolin',12.04,2012-04-26,true", - "17,'Quantal Quetzal',12.10,2012-10-18,false", - "18,'Raring Ringtail',13.04,2013-04-25,false", - "19,'Saucy Salamander',13.10,2013-10-17,false" - ); + "1,'Warty Warthog',4.10,'2004-10-20',false", + "2,'Hoary Hedgehog',5.04,'2005-04-08',false", + "3,'Breezy Badger',5.10,'2005-10-13',false", + "4,'Dapper Drake',6.06,'2006-06-01',true", + "5,'Edgy Eft',6.10,'2006-10-26',false", + "6,'Feisty Fawn',7.04,'2007-04-19',false", + "7,'Gutsy Gibbon',7.10,'2007-10-18',false", + "8,'Hardy Heron',8.04,'2008-04-24',true", + "9,'Intrepid Ibex',8.10,'2008-10-18',false", + "10,'Jaunty Jackalope',9.04,'2009-04-23',false", + "11,'Karmic Koala',9.10,'2009-10-29',false", + "12,'Lucid Lynx',10.04,'2010-04-29',true", + "13,'Maverick Meerkat',10.10,'2010-10-10',false", + "14,'Natty Narwhal',11.04,'2011-04-28',false", + "15,'Oneiric Ocelot',11.10,'2011-10-10',false", + "16,'Precise Pangolin',12.04,'2012-04-26',true", + "17,'Quantal Quetzal',12.10,'2012-10-18',false", + "18,'Raring Ringtail',13.04,'2013-04-25',false", + "19,'Saucy Salamander',13.10,'2013-10-17',false" + ); // Clean up testing table dropTable();
