Repository: sqoop Updated Branches: refs/heads/trunk 6e555218f -> 420fc3d53
SQOOP-2055: Run only one map task attempt during export (Jarek Jarcec Cecho via Venkat Ranganathan) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/420fc3d5 Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/420fc3d5 Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/420fc3d5 Branch: refs/heads/trunk Commit: 420fc3d53f1db62710710b93b9801cff5e4d1b53 Parents: 6e55521 Author: Venkat Ranganathan <[email protected]> Authored: Wed Jan 28 22:45:05 2015 -0800 Committer: Venkat Ranganathan <[email protected]> Committed: Wed Jan 28 22:45:05 2015 -0800 ---------------------------------------------------------------------- .../apache/sqoop/mapreduce/ExportJobBase.java | 33 ++++++++++++++++++++ 1 file changed, 33 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/420fc3d5/src/java/org/apache/sqoop/mapreduce/ExportJobBase.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/mapreduce/ExportJobBase.java b/src/java/org/apache/sqoop/mapreduce/ExportJobBase.java index cb846e8..f9fa7f3 100644 --- a/src/java/org/apache/sqoop/mapreduce/ExportJobBase.java +++ b/src/java/org/apache/sqoop/mapreduce/ExportJobBase.java @@ -82,6 +82,18 @@ public class ExportJobBase extends JobBase { public static final String EXPORT_MAP_TASKS_KEY = "sqoop.mapreduce.export.map.tasks"; + /** + * Maximal number of attempts for map task during export + * + * Sqoop will default to "1" if this property is not set regardless of what is configured directly + * in your hadoop configuration. + */ + public static final String SQOOP_EXPORT_MAP_TASK_MAX_ATTEMTPS = + "sqoop.export.mapred.map.max.attempts"; + + private static final String HADOOP_MAP_TASK_MAX_ATTEMTPS = + "mapred.map.max.attempts"; + protected ExportJobContext context; @@ -510,4 +522,25 @@ public class ExportJobBase extends JobBase { */ protected void jobTeardown(Job job) throws IOException, ExportException { } + + @Override + protected void propagateOptionsToJob(Job job) { + super.propagateOptionsToJob(job); + Configuration conf = job.getConfiguration(); + + // This is export job where re-trying failed mapper mostly don't make sense. By + // default we will force MR to run only one attempt per mapper. User or connector + // developer can override this behavior by setting SQOOP_EXPORT_MAP_TASK_MAX_ATTEMTPS: + // + // * Positive number - we will allow specified number of attempts + // * Negative number - we will default to Hadoop's default number of attempts + // + // This is important for most connectors as they are directly committing data to + // final table and hence re-running one mapper will lead to a misleading errors + // of inserting duplicate rows. + int sqoopMaxAttempts = conf.getInt(SQOOP_EXPORT_MAP_TASK_MAX_ATTEMTPS, 1); + if (sqoopMaxAttempts > 1) { + conf.setInt(HADOOP_MAP_TASK_MAX_ATTEMTPS, sqoopMaxAttempts); + } + } }
