Updated Branches: refs/heads/trunk ddb81e185 -> 840711812
SQOOP-1192: Add option "--skip-dist-cache" to allow Sqoop not copying jars in %SQOOP_HOME%\lib folder when launched by Oozie and use Oozie share lib (Shuaishuai Nie via Jarek Jarcec Cecho) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/84071181 Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/84071181 Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/84071181 Branch: refs/heads/trunk Commit: 84071181265f98959ffdfc41425022f8251d2429 Parents: ddb81e1 Author: Jarek Jarcec Cecho <[email protected]> Authored: Mon Oct 14 15:31:11 2013 -0700 Committer: Jarek Jarcec Cecho <[email protected]> Committed: Mon Oct 14 15:31:11 2013 -0700 ---------------------------------------------------------------------- src/docs/user/import.txt | 13 +++++++++++++ src/java/org/apache/sqoop/SqoopOptions.java | 9 +++++++++ src/java/org/apache/sqoop/mapreduce/JobBase.java | 4 ++++ .../org/apache/sqoop/mapreduce/TextExportMapper.java | 3 +-- .../sqoop/mapreduce/hcat/SqoopHCatUtilities.java | 4 ++++ src/java/org/apache/sqoop/tool/BaseSqoopTool.java | 10 ++++++++++ src/test/com/cloudera/sqoop/TestSqoopOptions.java | 6 ++++++ 7 files changed, 47 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/docs/user/import.txt ---------------------------------------------------------------------- diff --git a/src/docs/user/import.txt b/src/docs/user/import.txt index 71b50d8..dfc9b39 100644 --- a/src/docs/user/import.txt +++ b/src/docs/user/import.txt @@ -208,6 +208,19 @@ multi-column indices. If your table has no index column, or has a multi-column key, then you must also manually choose a splitting column. +Controlling Distributed Cache +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Sqoop will copy the jars in $SQOOP_HOME/lib folder to job cache every +time when start a Sqoop job. When launched by Oozie this is unnecessary +since Oozie use its own Sqoop share lib which keeps Sqoop dependencies +in the distributed cache. Oozie will do the localization on each +worker node for the Sqoop dependencies only once during the first Sqoop +job and reuse the jars on worker node for subsquencial jobs. Using +option +--skip-dist-cache+ in Sqoop command when launched by Oozie will +skip the step which Sqoop copies its dependencies to job cache and save +massive I/O. + Controlling the Import Process ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/java/org/apache/sqoop/SqoopOptions.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/SqoopOptions.java b/src/java/org/apache/sqoop/SqoopOptions.java index 836f588..13637b5 100644 --- a/src/java/org/apache/sqoop/SqoopOptions.java +++ b/src/java/org/apache/sqoop/SqoopOptions.java @@ -165,6 +165,7 @@ public class SqoopOptions implements Cloneable { @StoredAsProperty("hcatalog.storage.stanza") private String hCatStorageStanza; private String hCatHome; // not serialized to metastore. + private boolean skipDistCache; // User explicit mapping of types private Properties mapColumnJava; // stored as map.colum.java @@ -2198,4 +2199,12 @@ public class SqoopOptions implements Cloneable { public void setCall(String theCall) { this.call = theCall; } + + public void setSkipDistCache(boolean skip) { + this.skipDistCache = skip; + } + + public boolean isSkipDistCache() { + return this.skipDistCache; + } } http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/java/org/apache/sqoop/mapreduce/JobBase.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/mapreduce/JobBase.java b/src/java/org/apache/sqoop/mapreduce/JobBase.java index 322df1c..ddef421 100644 --- a/src/java/org/apache/sqoop/mapreduce/JobBase.java +++ b/src/java/org/apache/sqoop/mapreduce/JobBase.java @@ -133,6 +133,10 @@ public class JobBase { */ protected void cacheJars(Job job, ConnManager mgr) throws IOException { + if (options.isSkipDistCache()) { + LOG.info("Not adding sqoop jars to distributed cache as requested"); + return; + } Configuration conf = job.getConfiguration(); FileSystem fs = FileSystem.getLocal(conf); http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java b/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java index fb1edfd..8a354b5 100644 --- a/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java +++ b/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java @@ -109,8 +109,7 @@ public class TextExportMapper LOG.error("due to the batching nature of export."); LOG.error(""); - throw new IOException("Can't export data, please check task tracker logs", - e); + throw new IOException("Can't export data, please check failed map task logs", e); } } } http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java b/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java index b05f587..09652df 100644 --- a/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java +++ b/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java @@ -730,6 +730,10 @@ public final class SqoopHCatUtilities { LOG.info("Not adding hcatalog jars to distributed cache in local mode"); return; } + if (options.isSkipDistCache()) { + LOG.info("Not adding hcatalog jars to distributed cache as requested"); + return; + } Configuration conf = job.getConfiguration(); String hiveHome = null; String hCatHome = null; http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/java/org/apache/sqoop/tool/BaseSqoopTool.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/tool/BaseSqoopTool.java b/src/java/org/apache/sqoop/tool/BaseSqoopTool.java index a1080d3..9230f82 100644 --- a/src/java/org/apache/sqoop/tool/BaseSqoopTool.java +++ b/src/java/org/apache/sqoop/tool/BaseSqoopTool.java @@ -155,6 +155,7 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool { public static final String UPDATE_KEY_ARG = "update-key"; public static final String UPDATE_MODE_ARG = "update-mode"; public static final String CALL_ARG = "call"; + public static final String SKIP_DISTCACHE_ARG = "skip-dist-cache"; // Arguments for validation. public static final String VALIDATE_ARG = "validate"; @@ -416,6 +417,10 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool { .hasArg().withDescription("Override $HADOOP_MAPRED_HOME_ARG") .withLongOpt(HADOOP_HOME_ARG) .create()); + commonOpts.addOption(OptionBuilder + .withDescription("Skip copying jars to distributed cache") + .withLongOpt(SKIP_DISTCACHE_ARG) + .create()); // misc (common) commonOpts.addOption(OptionBuilder @@ -827,6 +832,11 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool { out.setDriverClassName(in.getOptionValue(DRIVER_ARG)); } + if (in.hasOption(SKIP_DISTCACHE_ARG)) { + LOG.debug("Disabling dist cache"); + out.setSkipDistCache(true); + } + applyCredentialsOptions(in, out); http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/test/com/cloudera/sqoop/TestSqoopOptions.java ---------------------------------------------------------------------- diff --git a/src/test/com/cloudera/sqoop/TestSqoopOptions.java b/src/test/com/cloudera/sqoop/TestSqoopOptions.java index 90bc08e..686d398 100644 --- a/src/test/com/cloudera/sqoop/TestSqoopOptions.java +++ b/src/test/com/cloudera/sqoop/TestSqoopOptions.java @@ -280,6 +280,12 @@ public class TestSqoopOptions extends TestCase { assertEquals("String", mapping.get("id")); } + public void testSkipDistCacheOption() throws Exception { + String[] args = {"--skip-dist-cache"}; + SqoopOptions opts = parse(args); + assertTrue(opts.isSkipDistCache()); + } + public void testPropertySerialization1() { // Test that if we write a SqoopOptions out to a Properties, // and then read it back in, we get all the same results.
