Updated Branches: refs/heads/trunk 7c830c818 -> 75bc21b16
SQOOP-885: Allow excluding some tables from import-all-tables tools (Abraham Elmahrek via Jarek Jarcec Cecho) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/75bc21b1 Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/75bc21b1 Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/75bc21b1 Branch: refs/heads/trunk Commit: 75bc21b16435457844c8a2dec8410abf5156f738 Parents: 7c830c8 Author: Jarek Jarcec Cecho <[email protected]> Authored: Wed Mar 27 17:59:57 2013 -0700 Committer: Jarek Jarcec Cecho <[email protected]> Committed: Wed Mar 27 17:59:57 2013 -0700 ---------------------------------------------------------------------- src/docs/man/sqoop-import-all-tables.txt | 3 + src/docs/user/import-all-tables.txt | 3 + src/java/org/apache/sqoop/SqoopOptions.java | 18 +++++ src/java/org/apache/sqoop/tool/BaseSqoopTool.java | 3 + .../org/apache/sqoop/tool/ImportAllTablesTool.java | 44 +++++++++++- src/test/com/cloudera/sqoop/TestAllTables.java | 57 ++++++++++++++- 6 files changed, 125 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/75bc21b1/src/docs/man/sqoop-import-all-tables.txt ---------------------------------------------------------------------- diff --git a/src/docs/man/sqoop-import-all-tables.txt b/src/docs/man/sqoop-import-all-tables.txt index 5d340b9..6b639f5 100644 --- a/src/docs/man/sqoop-import-all-tables.txt +++ b/src/docs/man/sqoop-import-all-tables.txt @@ -60,6 +60,9 @@ Import control options --compression-codec (codec):: Uses the Hadoop +codec+ class to compress data as it is written to HDFS. +--exclude-tables (codec):: + Comma separated list of tables to exclude from import process. + include::output-args.txt[] http://git-wip-us.apache.org/repos/asf/sqoop/blob/75bc21b1/src/docs/user/import-all-tables.txt ---------------------------------------------------------------------- diff --git a/src/docs/user/import-all-tables.txt b/src/docs/user/import-all-tables.txt index 9f9bc88..8c3a4f5 100644 --- a/src/docs/user/import-all-tables.txt +++ b/src/docs/user/import-all-tables.txt @@ -56,11 +56,14 @@ Argument Description +\--warehouse-dir <dir>+ HDFS parent for table destination +-z,\--compress+ Enable compression +\--compression-codec <c>+ Use Hadoop codec (default gzip) ++\--exclude-tables <tables>+ Comma separated list of tables to exclude\ + from import process --------------------------------------------------------------------- These arguments behave in the same manner as they do when used for the +sqoop-import+ tool, but the +\--table+, +\--split-by+, +\--columns+, and +\--where+ arguments are invalid for +sqoop-import-all-tables+. +The +\--exclude-tables argument is for +sqoop-import-all-tables+ only. include::output-args.txt[] http://git-wip-us.apache.org/repos/asf/sqoop/blob/75bc21b1/src/java/org/apache/sqoop/SqoopOptions.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/SqoopOptions.java b/src/java/org/apache/sqoop/SqoopOptions.java index 08bab1e..906542b 100644 --- a/src/java/org/apache/sqoop/SqoopOptions.java +++ b/src/java/org/apache/sqoop/SqoopOptions.java @@ -225,6 +225,10 @@ public class SqoopOptions implements Cloneable { @StoredAsProperty("incremental.last.value") private String incrementalLastValue; + // exclude these tables when importing all tables. + @StoredAsProperty("import.all_tables.exclude") + private String allTablesExclude; + // HDFS paths for "old" and "new" datasets in merge tool. @StoredAsProperty("merge.old.path") private String mergeOldPath; @StoredAsProperty("merge.new.path") private String mergeNewPath; @@ -1873,6 +1877,20 @@ public class SqoopOptions implements Cloneable { } /** + * Set the tables to be excluded when doing all table import. + */ + public void setAllTablesExclude(String exclude) { + this.allTablesExclude = exclude; + } + + /** + * Get the tables to be excluded when doing all table import. + */ + public String getAllTablesExclude() { + return this.allTablesExclude; + } + + /** * Set the name of the saved job this SqoopOptions belongs to. */ public void setJobName(String job) { http://git-wip-us.apache.org/repos/asf/sqoop/blob/75bc21b1/src/java/org/apache/sqoop/tool/BaseSqoopTool.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/tool/BaseSqoopTool.java b/src/java/org/apache/sqoop/tool/BaseSqoopTool.java index 9874bae..c457272 100644 --- a/src/java/org/apache/sqoop/tool/BaseSqoopTool.java +++ b/src/java/org/apache/sqoop/tool/BaseSqoopTool.java @@ -159,6 +159,9 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool { public static final String INCREMENT_COL_ARG = "check-column"; public static final String INCREMENT_LAST_VAL_ARG = "last-value"; + // Arguments for all table imports. + public static final String ALL_TABLE_EXCLUDES_ARG = "exclude-tables"; + // HBase arguments. public static final String HBASE_TABLE_ARG = "hbase-table"; public static final String HBASE_COL_FAM_ARG = "column-family"; http://git-wip-us.apache.org/repos/asf/sqoop/blob/75bc21b1/src/java/org/apache/sqoop/tool/ImportAllTablesTool.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/tool/ImportAllTablesTool.java b/src/java/org/apache/sqoop/tool/ImportAllTablesTool.java index 158a3f1..ab95959 100644 --- a/src/java/org/apache/sqoop/tool/ImportAllTablesTool.java +++ b/src/java/org/apache/sqoop/tool/ImportAllTablesTool.java @@ -19,12 +19,19 @@ package org.apache.sqoop.tool; import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.OptionBuilder; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.cloudera.sqoop.Sqoop; import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; +import com.cloudera.sqoop.cli.RelatedOptions; import com.cloudera.sqoop.hive.HiveImport; import com.cloudera.sqoop.util.ImportException; @@ -41,9 +48,36 @@ public class ImportAllTablesTool extends com.cloudera.sqoop.tool.ImportTool { } @Override + @SuppressWarnings("static-access") + /** {@inheritDoc} */ + protected RelatedOptions getImportOptions() { + // Imports + RelatedOptions importOpts = super.getImportOptions(); + + importOpts.addOption(OptionBuilder.withArgName("tables") + .hasArg().withDescription("Tables to exclude when importing all tables") + .withLongOpt(ALL_TABLE_EXCLUDES_ARG) + .create()); + + return importOpts; + } + + @Override + /** {@inheritDoc} */ + public void applyOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + super.applyOptions(in, out); + + if (in.hasOption(ALL_TABLE_EXCLUDES_ARG)) { + out.setAllTablesExclude(in.getOptionValue(ALL_TABLE_EXCLUDES_ARG)); + } + } + + @Override /** {@inheritDoc} */ public int run(SqoopOptions options) { HiveImport hiveImport = null; + Set<String> excludes = new HashSet<String>(); if (!init(options)) { return 1; @@ -54,6 +88,10 @@ public class ImportAllTablesTool extends com.cloudera.sqoop.tool.ImportTool { hiveImport = new HiveImport(options, manager, options.getConf(), false); } + if (options.getAllTablesExclude() != null) { + excludes.addAll(Arrays.asList(options.getAllTablesExclude().split(","))); + } + String [] tables = manager.listTables(); if (null == tables) { System.err.println("Could not retrieve tables list from server"); @@ -61,7 +99,11 @@ public class ImportAllTablesTool extends com.cloudera.sqoop.tool.ImportTool { return 1; } else { for (String tableName : tables) { - importTable(options, tableName, hiveImport); + if (excludes.contains(tableName)) { + System.out.println("Skipping table: " + tableName); + } else { + importTable(options, tableName, hiveImport); + } } } } catch (IOException ioe) { http://git-wip-us.apache.org/repos/asf/sqoop/blob/75bc21b1/src/test/com/cloudera/sqoop/TestAllTables.java ---------------------------------------------------------------------- diff --git a/src/test/com/cloudera/sqoop/TestAllTables.java b/src/test/com/cloudera/sqoop/TestAllTables.java index 133bc8f..d2c7b16 100644 --- a/src/test/com/cloudera/sqoop/TestAllTables.java +++ b/src/test/com/cloudera/sqoop/TestAllTables.java @@ -23,6 +23,7 @@ import java.sql.SQLException; import java.util.ArrayList; import java.util.List; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -42,7 +43,7 @@ public class TestAllTables extends ImportJobTestCase { * Create the argv to pass to Sqoop. * @return the argv as an array of strings. */ - private String [] getArgv(boolean includeHadoopFlags) { + private String [] getArgv(boolean includeHadoopFlags, String[] excludeTables) { ArrayList<String> args = new ArrayList<String>(); if (includeHadoopFlags) { @@ -57,6 +58,10 @@ public class TestAllTables extends ImportJobTestCase { args.add("1"); args.add("--escaped-by"); args.add("\\"); + if (excludeTables != null) { + args.add("--exclude-tables"); + args.add(StringUtils.join(excludeTables, ",")); + } return args.toArray(new String[0]); } @@ -106,7 +111,7 @@ public class TestAllTables extends ImportJobTestCase { } public void testMultiTableImport() throws IOException { - String [] argv = getArgv(true); + String [] argv = getArgv(true, null); runImport(new ImportAllTablesTool(), argv); Path warehousePath = new Path(this.getWarehouseDir()); @@ -140,4 +145,52 @@ public class TestAllTables extends ImportJobTestCase { } } } + + public void testMultiTableImportWithExclude() throws IOException { + String exclude = this.tableNames.get(0); + String [] argv = getArgv(true, new String[]{ exclude }); + runImport(new ImportAllTablesTool(), argv); + + Path warehousePath = new Path(this.getWarehouseDir()); + int i = 0; + for (String tableName : this.tableNames) { + Path tablePath = new Path(warehousePath, tableName); + Path filePath = new Path(tablePath, "part-m-00000"); + + // dequeue the expected value for this table. This + // list has the same order as the tableNames list. + String expectedVal = Integer.toString(i++) + "," + + this.expectedStrings.get(0); + this.expectedStrings.remove(0); + + BufferedReader reader = null; + if (!isOnPhysicalCluster()) { + reader = new BufferedReader( + new InputStreamReader(new FileInputStream( + new File(filePath.toString())))); + } else { + FSDataInputStream dis; + FileSystem dfs = FileSystem.get(getConf()); + if (tableName.equals(exclude)) { + try { + dis = dfs.open(filePath); + assertFalse(true); + } catch (FileNotFoundException e) { + // Success + continue; + } + } else { + dis = dfs.open(filePath); + } + reader = new BufferedReader(new InputStreamReader(dis)); + } + try { + String line = reader.readLine(); + assertEquals("Table " + tableName + " expected a different string", + expectedVal, line); + } finally { + IOUtils.closeStream(reader); + } + } + } }
