Repository: sqoop Updated Branches: refs/heads/sqoop2 87855a376 -> 4a5bd295d
SQOOP-2382: Sqoop2: Arithmetic exception in partitioner when allow null is true (Banmeet Singh via Abraham Elmahrek) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/4a5bd295 Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/4a5bd295 Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/4a5bd295 Branch: refs/heads/sqoop2 Commit: 4a5bd295d828eaa37b3401872dff83addf36193f Parents: 87855a3 Author: Abraham Elmahrek <[email protected]> Authored: Tue Jun 23 13:47:03 2015 -0700 Committer: Abraham Elmahrek <[email protected]> Committed: Tue Jun 23 13:47:33 2015 -0700 ---------------------------------------------------------------------- .../sqoop/job/etl/PartitionerContext.java | 28 +++++++++++ .../connector/jdbc/GenericJdbcPartitioner.java | 7 ++- docs/src/site/sphinx/Sqoop5MinutesDemo.rst | 4 +- .../apache/sqoop/job/mr/SqoopInputFormat.java | 4 +- .../jdbc/generic/FromRDBMSToHDFSTest.java | 50 ++++++++++++++++++++ 5 files changed, 90 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/4a5bd295/common/src/main/java/org/apache/sqoop/job/etl/PartitionerContext.java ---------------------------------------------------------------------- diff --git a/common/src/main/java/org/apache/sqoop/job/etl/PartitionerContext.java b/common/src/main/java/org/apache/sqoop/job/etl/PartitionerContext.java index 67fffd6..bb52bb2 100644 --- a/common/src/main/java/org/apache/sqoop/job/etl/PartitionerContext.java +++ b/common/src/main/java/org/apache/sqoop/job/etl/PartitionerContext.java @@ -35,6 +35,8 @@ public class PartitionerContext extends TransferableContext { private Schema schema; + private boolean skipMaxPartitionCheck = false; + public PartitionerContext(ImmutableContext context, long maxPartitions, Schema schema) { super(context); this.maxPartitions = maxPartitions; @@ -54,6 +56,32 @@ public class PartitionerContext extends TransferableContext { } /** + * Set flag indicating whether to skip check that number of splits + * < max extractors specified by user. + * + * Needed in case user specifies number of extractors as 1 as well as + * allows null values in partitioning column + * + * @return + */ + public void setSkipMaxPartitionCheck(boolean skipMaxPartitionCheck) { + this.skipMaxPartitionCheck = skipMaxPartitionCheck; + } + + /** + * Return flag indicating whether to skip the check that number of splits + * < max extractors specified by user. + * + * Needed in case user specifies number of extractors as 1 as well as + * allows null values in partitioning column + * + * @return + */ + public boolean getSkipMaxPartitionCheck() { + return this.skipMaxPartitionCheck; + } + + /** * Return schema associated with this step. * * @return http://git-wip-us.apache.org/repos/asf/sqoop/blob/4a5bd295/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcPartitioner.java ---------------------------------------------------------------------- diff --git a/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcPartitioner.java b/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcPartitioner.java index 23b57c0..a99b3a9 100644 --- a/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcPartitioner.java +++ b/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcPartitioner.java @@ -74,7 +74,12 @@ public class GenericJdbcPartitioner extends Partitioner<LinkConfiguration, FromJ GenericJdbcPartition partition = new GenericJdbcPartition(); partition.setConditions(partitionColumnName + " IS NULL"); partitions.add(partition); - numberPartitions -= 1; + if (numberPartitions > 1) { + numberPartitions -= 1; + } + else { + context.setSkipMaxPartitionCheck(true); + } } switch (partitionColumnType) { http://git-wip-us.apache.org/repos/asf/sqoop/blob/4a5bd295/docs/src/site/sphinx/Sqoop5MinutesDemo.rst ---------------------------------------------------------------------- diff --git a/docs/src/site/sphinx/Sqoop5MinutesDemo.rst b/docs/src/site/sphinx/Sqoop5MinutesDemo.rst index fc9a43b..19115a2 100644 --- a/docs/src/site/sphinx/Sqoop5MinutesDemo.rst +++ b/docs/src/site/sphinx/Sqoop5MinutesDemo.rst @@ -184,7 +184,9 @@ Next, we can use the two link Ids to associate the ``From`` and ``To`` for the j Loaders:(Optional) 2 New job was successfully created with validation status OK and persistent id 1 -Our new job object was created with assigned id 1. +Our new job object was created with assigned id 1. Note that if null value is allowed for the partition column, +at least 2 extractors are needed for Sqoop to carry out the data transfer. On specifying 1 extractor in this +scenario, Sqoop shall ignore this setting and continue with 2 extractors. Start Job ( a.k.a Data transfer ) ================================= http://git-wip-us.apache.org/repos/asf/sqoop/blob/4a5bd295/execution/mapreduce/src/main/java/org/apache/sqoop/job/mr/SqoopInputFormat.java ---------------------------------------------------------------------- diff --git a/execution/mapreduce/src/main/java/org/apache/sqoop/job/mr/SqoopInputFormat.java b/execution/mapreduce/src/main/java/org/apache/sqoop/job/mr/SqoopInputFormat.java index 732ee0a..5aef878 100644 --- a/execution/mapreduce/src/main/java/org/apache/sqoop/job/mr/SqoopInputFormat.java +++ b/execution/mapreduce/src/main/java/org/apache/sqoop/job/mr/SqoopInputFormat.java @@ -80,7 +80,9 @@ public class SqoopInputFormat extends InputFormat<SqoopSplit, NullWritable> { splits.add(split); } - if(splits.size() > maxPartitions) { + //SQOOP-2382: Need to skip this check in case extractors is set to 1 + // and null values are allowed in partitioning column + if(splits.size() > maxPartitions && (false == partitionerContext.getSkipMaxPartitionCheck())) { throw new SqoopException(MRExecutionError.MAPRED_EXEC_0025, String.format("Got %d, max was %d", splits.size(), maxPartitions)); } http://git-wip-us.apache.org/repos/asf/sqoop/blob/4a5bd295/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java ---------------------------------------------------------------------- diff --git a/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java b/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java index 52d62a6..1272ed9 100644 --- a/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java +++ b/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java @@ -270,4 +270,54 @@ public class FromRDBMSToHDFSTest extends ConnectorTestCase { // Clean up testing table dropTable(); } + + @Test + public void testAllowNullsWithOneExtractor() throws Exception { + //Integration test case for SQOOP-2382 + //Server must not throw an exception when null values are allowed in the + //partitioning column and number of extractors is set to only 1 + + createAndLoadTableCities(); + + // RDBMS link + MLink rdbmsConnection = getClient().createLink("generic-jdbc-connector"); + fillRdbmsLinkConfig(rdbmsConnection); + saveLink(rdbmsConnection); + + // HDFS link + MLink hdfsConnection = getClient().createLink("hdfs-connector"); + fillHdfsLink(hdfsConnection); + saveLink(hdfsConnection); + + // Job creation + MJob job = getClient().createJob(rdbmsConnection.getPersistenceId(), hdfsConnection.getPersistenceId()); + + // Set rdbms "FROM" config + fillRdbmsFromConfig(job, "id"); + + MConfigList configs = job.getFromJobConfig(); + configs.getBooleanInput("fromJobConfig.allowNullValueInPartitionColumn").setValue(true); + + // fill the hdfs "TO" config + fillHdfsToConfig(job, ToFormat.TEXT_FILE); + // driver config + MDriverConfig driverConfig = job.getDriverConfig(); + driverConfig.getIntegerInput("throttlingConfig.numExtractors").setValue(1); + + saveJob(job); + + executeJob(job); + + // Assert correct output + assertTo( + "1,'USA','2004-10-23','San Francisco'", + "2,'USA','2004-10-24','Sunnyvale'", + "3,'Czech Republic','2004-10-25','Brno'", + "4,'USA','2004-10-26','Palo Alto'" + ); + + // Clean up testing table + dropTable(); + } + }
