Repository: sqoop Updated Branches: refs/heads/trunk 803edc279 -> e280b47ea
SQOOP-3123: Introduce escaping logic for column mapping parameters (same what Sqoop already uses for the DB column names), thus special column names (e.g. containing '#' character) and mappings realted to those columns can be in the same format (thus not confusing the end users), and also eliminates the related AVRO format clashing issues. (Dmitry Zagorulkin via Attila Szabo) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/e280b47e Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/e280b47e Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/e280b47e Branch: refs/heads/trunk Commit: e280b47eacc3428040669df5f91cedccd5be7e46 Parents: 803edc2 Author: Attila Szabo <[email protected]> Authored: Mon Mar 20 17:25:29 2017 +0100 Committer: Attila Szabo <[email protected]> Committed: Mon Mar 20 17:25:29 2017 +0100 ---------------------------------------------------------------------- src/java/org/apache/sqoop/SqoopOptions.java | 47 +++++++++++++++++++- src/java/org/apache/sqoop/orm/ClassWriter.java | 16 +++---- .../org/apache/sqoop/tool/BaseSqoopTool.java | 17 +++++++ src/java/org/apache/sqoop/tool/ImportTool.java | 16 ++++--- src/test/com/cloudera/sqoop/TestAvroImport.java | 43 ++++++++++++++++++ .../com/cloudera/sqoop/TestSqoopOptions.java | 30 +++++++++---- 6 files changed, 145 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/e280b47e/src/java/org/apache/sqoop/SqoopOptions.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/SqoopOptions.java b/src/java/org/apache/sqoop/SqoopOptions.java index caf95f6..c3f4604 100644 --- a/src/java/org/apache/sqoop/SqoopOptions.java +++ b/src/java/org/apache/sqoop/SqoopOptions.java @@ -35,6 +35,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.sqoop.accumulo.AccumuloConstants; import org.apache.sqoop.mapreduce.mainframe.MainframeConfiguration; +import org.apache.sqoop.tool.BaseSqoopTool; import org.apache.sqoop.util.CredentialsUtil; import org.apache.sqoop.util.LoggingUtils; import org.apache.sqoop.util.SqoopJsonUtil; @@ -53,6 +54,7 @@ import com.cloudera.sqoop.util.RandomHash; import com.cloudera.sqoop.util.StoredAsProperty; import static org.apache.sqoop.Sqoop.SQOOP_RETHROW_PROPERTY; +import static org.apache.sqoop.orm.ClassWriter.toJavaIdentifier; /** * Configurable state used by Sqoop tools. @@ -78,7 +80,6 @@ public class SqoopOptions implements Cloneable { public static final String DEF_HCAT_HOME_OLD = "/usr/lib/hcatalog"; public static final boolean METASTORE_PASSWORD_DEFAULT = false; - /** * Thrown when invalid cmdline options are given. */ @@ -201,6 +202,9 @@ public class SqoopOptions implements Cloneable { // User explicit mapping of types private Properties mapColumnJava; // stored as map.colum.java private Properties mapColumnHive; // stored as map.column.hive + // SQOOP-3123 default enabled + private boolean escapeColumnMappingEnabled; + private Properties mapReplacedColumnJava; // used to replace special characters in columns // An ordered list of column names denoting what order columns are // serialized to a PreparedStatement from a generated record type. @@ -874,6 +878,10 @@ public class SqoopOptions implements Cloneable { other.mapColumnJava = (Properties) this.mapColumnJava.clone(); } + if (null != mapReplacedColumnJava) { + other.mapReplacedColumnJava = (Properties) this.mapReplacedColumnJava.clone(); + } + return other; } catch (CloneNotSupportedException cnse) { // Shouldn't happen. @@ -1064,6 +1072,9 @@ public class SqoopOptions implements Cloneable { // set default metadata transaction isolation level to TRANSACTION_READ_COMMITTED this.metadataTransactionIsolationLevel = Connection.TRANSACTION_READ_COMMITTED; + + // set escape column mapping to true + this.escapeColumnMappingEnabled = true; } /** @@ -2734,4 +2745,38 @@ public class SqoopOptions implements Cloneable { getConf().setBoolean(ORACLE_ESCAPING_DISABLED, escapingDisabled); } + public void setEscapeMappingColumnNamesEnabled(boolean escapingEnabled) { + this.escapeColumnMappingEnabled = escapingEnabled; + // important to have custom setter to ensure option is available through + // Hadoop configuration on those places where SqoopOptions is not reachable + getConf().setBoolean(BaseSqoopTool.ESCAPE_MAPPING_COLUMN_NAMES_ENABLED, escapingEnabled); + } + + public boolean getEscapeMappingColumnNamesEnabled() { + return escapeColumnMappingEnabled; + } + + public Properties getColumnNames() { + if (escapeColumnMappingEnabled && null == mapReplacedColumnJava) { + return doCleanColumnMapping(); + } + return escapeColumnMappingEnabled ? mapReplacedColumnJava : mapColumnJava; + } + + private Properties doCleanColumnMapping() { + mapReplacedColumnJava = new Properties(); + + if (!mapColumnJava.isEmpty()) { + for (Map.Entry<Object, Object> entry : mapColumnJava.entrySet()) { + String candidate = toJavaIdentifier((String)entry.getKey()); + mapReplacedColumnJava.put(candidate, mapColumnJava.getProperty((String)entry.getKey())); + } + return mapReplacedColumnJava; + } + + return mapColumnJava; + } + + } + http://git-wip-us.apache.org/repos/asf/sqoop/blob/e280b47e/src/java/org/apache/sqoop/orm/ClassWriter.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/orm/ClassWriter.java b/src/java/org/apache/sqoop/orm/ClassWriter.java index c18a36f..eaa9123 100644 --- a/src/java/org/apache/sqoop/orm/ClassWriter.java +++ b/src/java/org/apache/sqoop/orm/ClassWriter.java @@ -39,18 +39,18 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.sqoop.mapreduce.ImportJobBase; import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.manager.ConnManager; import com.cloudera.sqoop.lib.BigDecimalSerializer; +import com.cloudera.sqoop.lib.BlobRef; import com.cloudera.sqoop.lib.BooleanParser; +import com.cloudera.sqoop.lib.ClobRef; import com.cloudera.sqoop.lib.DelimiterSet; import com.cloudera.sqoop.lib.FieldFormatter; import com.cloudera.sqoop.lib.JdbcWritableBridge; import com.cloudera.sqoop.lib.LargeObjectLoader; import com.cloudera.sqoop.lib.LobSerializer; import com.cloudera.sqoop.lib.RecordParser; -import com.cloudera.sqoop.lib.BlobRef; -import com.cloudera.sqoop.lib.ClobRef; import com.cloudera.sqoop.lib.SqoopRecord; +import com.cloudera.sqoop.manager.ConnManager; /** * Creates an ORM class to represent a table from a database. @@ -289,10 +289,9 @@ public class ClassWriter { } private String toJavaType(String columnName, int sqlType) { - Properties mapping = options.getMapColumnJava(); - - if (mapping.containsKey(columnName)) { - String type = mapping.getProperty(columnName); + Properties columnMapping = options.getColumnNames(); + if (null != columnMapping && columnMapping.containsKey(columnName)) { + String type = (String) columnMapping.get(columnName); if (LOG.isDebugEnabled()) { LOG.info("Overriding type of column " + columnName + " to " + type); } @@ -1705,7 +1704,8 @@ public class ClassWriter { } // Check that all explicitly mapped columns are present in result set - Properties mapping = options.getMapColumnJava(); + Properties mapping = options.getColumnNames(); + if (mapping != null && !mapping.isEmpty()) { for(Object column : mapping.keySet()) { if (!uniqColNames.contains((String)column)) { http://git-wip-us.apache.org/repos/asf/sqoop/blob/e280b47e/src/java/org/apache/sqoop/tool/BaseSqoopTool.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/tool/BaseSqoopTool.java b/src/java/org/apache/sqoop/tool/BaseSqoopTool.java index 3d37859..46f405f 100644 --- a/src/java/org/apache/sqoop/tool/BaseSqoopTool.java +++ b/src/java/org/apache/sqoop/tool/BaseSqoopTool.java @@ -172,6 +172,7 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool { public static final String RELAXED_ISOLATION = "relaxed-isolation"; public static final String THROW_ON_ERROR_ARG = "throw-on-error"; public static final String ORACLE_ESCAPING_DISABLED = "oracle-escaping-disabled"; + public static final String ESCAPE_MAPPING_COLUMN_NAMES_ENABLED = "escape-mapping-column-names"; // Arguments for validation. public static final String VALIDATE_ARG = "validate"; @@ -787,6 +788,12 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool { .withDescription("Override mapping for specific columns to java types") .withLongOpt(MAP_COLUMN_JAVA) .create()); + codeGenOpts.addOption(OptionBuilder + .hasArg() + .withDescription("Disable special characters escaping in column names") + .withLongOpt(ESCAPE_MAPPING_COLUMN_NAMES_ENABLED) + .withArgName("boolean") + .create()); if (!multiTable) { codeGenOpts.addOption(OptionBuilder.withArgName("name") @@ -1084,6 +1091,11 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool { if (in.hasOption(ORACLE_ESCAPING_DISABLED)) { out.setOracleEscapingDisabled(Boolean.parseBoolean(in.getOptionValue(ORACLE_ESCAPING_DISABLED))); } + + if (in.hasOption(ESCAPE_MAPPING_COLUMN_NAMES_ENABLED)) { + out.setEscapeMappingColumnNamesEnabled(Boolean.parseBoolean(in.getOptionValue( + ESCAPE_MAPPING_COLUMN_NAMES_ENABLED))); + } } private void applyCredentialsOptions(CommandLine in, SqoopOptions out) @@ -1355,6 +1367,11 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool { if (!multiTable && in.hasOption(CLASS_NAME_ARG)) { out.setClassName(in.getOptionValue(CLASS_NAME_ARG)); } + + if (in.hasOption(ESCAPE_MAPPING_COLUMN_NAMES_ENABLED)) { + out.setEscapeMappingColumnNamesEnabled(Boolean.parseBoolean(in.getOptionValue( + ESCAPE_MAPPING_COLUMN_NAMES_ENABLED))); + } } protected void applyHBaseOptions(CommandLine in, SqoopOptions out) { http://git-wip-us.apache.org/repos/asf/sqoop/blob/e280b47e/src/java/org/apache/sqoop/tool/ImportTool.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/tool/ImportTool.java b/src/java/org/apache/sqoop/tool/ImportTool.java index d1c9749..4b1b12d 100644 --- a/src/java/org/apache/sqoop/tool/ImportTool.java +++ b/src/java/org/apache/sqoop/tool/ImportTool.java @@ -28,18 +28,15 @@ import java.sql.Types; import java.util.List; import java.util.Map; -import com.cloudera.sqoop.mapreduce.MergeJob; -import com.cloudera.sqoop.orm.TableClassName; -import com.cloudera.sqoop.util.ClassLoaderStack; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; - -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.StringUtils; +import org.apache.sqoop.avro.AvroSchemaMismatchException; import com.cloudera.sqoop.SqoopOptions; import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; @@ -47,12 +44,14 @@ import com.cloudera.sqoop.cli.RelatedOptions; import com.cloudera.sqoop.cli.ToolOptions; import com.cloudera.sqoop.hive.HiveImport; import com.cloudera.sqoop.manager.ImportJobContext; +import com.cloudera.sqoop.mapreduce.MergeJob; import com.cloudera.sqoop.metastore.JobData; import com.cloudera.sqoop.metastore.JobStorage; import com.cloudera.sqoop.metastore.JobStorageFactory; +import com.cloudera.sqoop.orm.TableClassName; import com.cloudera.sqoop.util.AppendUtils; +import com.cloudera.sqoop.util.ClassLoaderStack; import com.cloudera.sqoop.util.ImportException; -import org.apache.sqoop.avro.AvroSchemaMismatchException; import static org.apache.sqoop.manager.SupportedManagers.MYSQL; @@ -1007,6 +1006,11 @@ public class ImportTool extends com.cloudera.sqoop.tool.BaseSqoopTool { out.setAutoResetToOneMapper(true); } + if (in.hasOption(ESCAPE_MAPPING_COLUMN_NAMES_ENABLED)) { + out.setEscapeMappingColumnNamesEnabled(Boolean.parseBoolean(in.getOptionValue( + ESCAPE_MAPPING_COLUMN_NAMES_ENABLED))); + } + applyIncrementalOptions(in, out); applyHiveOptions(in, out); applyOutputFormatOptions(in, out); http://git-wip-us.apache.org/repos/asf/sqoop/blob/e280b47e/src/test/com/cloudera/sqoop/TestAvroImport.java ---------------------------------------------------------------------- diff --git a/src/test/com/cloudera/sqoop/TestAvroImport.java b/src/test/com/cloudera/sqoop/TestAvroImport.java index 26edd4c..da79c7a 100644 --- a/src/test/com/cloudera/sqoop/TestAvroImport.java +++ b/src/test/com/cloudera/sqoop/TestAvroImport.java @@ -48,6 +48,7 @@ import com.cloudera.sqoop.testutil.ImportJobTestCase; import org.junit.Test; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -317,7 +318,49 @@ public class TestAvroImport extends ImportJobTestCase { GenericRecord record1 = reader.next(); assertNull(record1.get("DATA_COL0")); + } + + @Test + public void testSpecialCharactersInColumnMappingWithConvertion() throws IOException, SQLException { + // escaping enabled by default + String [] extraArgsEscapeColNamesWithMapping = { "--map-column-java", + "INTFIELD1=String,DATA_#_COL0=String,DATA#COL1=String,DATA___COL2=String"}; + + // disable escaping + String [] extraArgsEscapingDisables = {"--escape-mapping-column-names", "false"}; + + // escaping enabled but mapping not provided + String [] extraArgsEscapingWithoutMapping = {}; + + checkRecordWithExtraArgs(extraArgsEscapeColNamesWithMapping, "TABLE1"); + checkRecordWithExtraArgs(extraArgsEscapingDisables, "TABLE2"); + checkRecordWithExtraArgs(extraArgsEscapingWithoutMapping, "TABLE3"); + } + + private void checkRecordWithExtraArgs(String[] extraArgs, String tableName) throws IOException { + String date = "2017-01-19"; + String timeStamp = "2017-01-19 14:47:57.112000"; + + String [] names = {"INTFIELD1", "DATA_#_COL0", "DATA#COL1", "DATA___COL2"}; + String [] types = { "INT", "DATE", "TIMESTAMP", "DECIMAL(2,20)" }; + String [] vals = {"1", "{ts \'" + date + "\'}", "{ts \'" + timeStamp + "\'}", "2e20"}; + + String [] checkNames = {"INTFIELD1", "DATA___COL0", "DATA_COL1", "DATA___COL2"}; + + setCurTableName(tableName); + + createTableWithColTypesAndNames(names, types, vals); + runImport(getOutputArgv(true, extraArgs)); + + Path outputFile = new Path(getTablePath(), "part-m-00000.avro"); + DataFileReader<GenericRecord> reader = read(outputFile); + GenericRecord record = reader.next(); + + for (String columnName : checkNames) { + assertNotNull(record.get(columnName)); + } + removeTableDir(); } protected DataFileReader<GenericRecord> read(Path filename) throws IOException { http://git-wip-us.apache.org/repos/asf/sqoop/blob/e280b47e/src/test/com/cloudera/sqoop/TestSqoopOptions.java ---------------------------------------------------------------------- diff --git a/src/test/com/cloudera/sqoop/TestSqoopOptions.java b/src/test/com/cloudera/sqoop/TestSqoopOptions.java index 0b33b24..dbdd2f1 100644 --- a/src/test/com/cloudera/sqoop/TestSqoopOptions.java +++ b/src/test/com/cloudera/sqoop/TestSqoopOptions.java @@ -20,21 +20,18 @@ package com.cloudera.sqoop; import java.util.Properties; -import com.cloudera.sqoop.tool.BaseSqoopTool; - import org.apache.commons.lang.ArrayUtils; import org.apache.sqoop.manager.oracle.OracleUtils; - -import com.cloudera.sqoop.lib.DelimiterSet; -import com.cloudera.sqoop.tool.ImportTool; -import com.cloudera.sqoop.testutil.HsqldbTestServer; -import org.junit.Before; import org.junit.After; +import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; + +import com.cloudera.sqoop.lib.DelimiterSet; +import com.cloudera.sqoop.testutil.HsqldbTestServer; +import com.cloudera.sqoop.tool.BaseSqoopTool; +import com.cloudera.sqoop.tool.ImportTool; import static org.apache.sqoop.Sqoop.SQOOP_RETHROW_PROPERTY; import static org.hamcrest.CoreMatchers.equalTo; @@ -782,4 +779,19 @@ public class TestSqoopOptions { validateImportOptions(extraArgs); } + @Test + public void testEscapeMapingColumnNames() throws Exception { + SqoopOptions opts = new SqoopOptions(); + // enabled by default + assertTrue(opts.getEscapeMappingColumnNamesEnabled()); + + String [] args = { + "--" + org.apache.sqoop.tool.BaseSqoopTool.ESCAPE_MAPPING_COLUMN_NAMES_ENABLED, + "false", + }; + + opts = parse(args); + assertFalse(opts.getEscapeMappingColumnNamesEnabled()); + } + }
