Updated Branches: refs/heads/trunk 666d5499a -> a220ae469
SQOOP-654: PostgreSQL direct connector is ignoring --null(-input)string and --null(-input)-non-string arguments (Jarek Jarcec Cecho via Cheolsoo Park) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/a220ae46 Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/a220ae46 Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/a220ae46 Branch: refs/heads/trunk Commit: a220ae46948bd388da728b3816d817b3cae31ce4 Parents: 666d549 Author: Cheolsoo Park <[email protected]> Authored: Wed Feb 6 14:18:23 2013 -0800 Committer: Cheolsoo Park <[email protected]> Committed: Wed Feb 6 14:18:23 2013 -0800 ---------------------------------------------------------------------- .../sqoop/manager/DirectPostgresqlManager.java | 20 ++++- .../org/apache/sqoop/util/SubstitutionUtils.java | 74 +++++++++++++++ .../sqoop/manager/PostgresqlImportTest.java | 31 +++++- .../cloudera/sqoop/util/TestSubstitutionUtils.java | 38 ++++++++ 4 files changed, 158 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/a220ae46/src/java/org/apache/sqoop/manager/DirectPostgresqlManager.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/manager/DirectPostgresqlManager.java b/src/java/org/apache/sqoop/manager/DirectPostgresqlManager.java index dba732c..a05bf60 100644 --- a/src/java/org/apache/sqoop/manager/DirectPostgresqlManager.java +++ b/src/java/org/apache/sqoop/manager/DirectPostgresqlManager.java @@ -30,6 +30,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sqoop.util.PostgreSQLUtils; @@ -45,6 +46,7 @@ import com.cloudera.sqoop.util.ImportException; import com.cloudera.sqoop.util.JdbcUrl; import com.cloudera.sqoop.util.LoggingAsyncSink; import com.cloudera.sqoop.util.PerfCounters; +import org.apache.sqoop.util.SubstitutionUtils; /** * Manages direct dumps from Postgresql databases via psql COPY TO STDOUT @@ -257,6 +259,14 @@ public class DirectPostgresqlManager sb.append(" TO STDOUT WITH DELIMITER E'\\"); sb.append(Integer.toString((int) this.options.getOutputFieldDelim(), 8)); sb.append("' CSV "); + + if (this.options.getNullStringValue() != null) { + sb.append("NULL AS E'"); + sb.append(SubstitutionUtils.removeEscapeCharacters( + this.options.getNullStringValue())); + sb.append("' "); + } + if (this.options.getOutputEnclosedBy() != '\0') { sb.append("QUOTE E'\\"); sb.append(Integer.toString((int) this.options.getOutputEnclosedBy(), 8)); @@ -277,7 +287,7 @@ public class DirectPostgresqlManager sb.append(";"); String copyCmd = sb.toString(); - LOG.debug("Copy command is " + copyCmd); + LOG.info("Copy command is " + copyCmd); return copyCmd; } @@ -317,6 +327,14 @@ public class DirectPostgresqlManager LOG.warn("Postgresql direct import; import will proceed as text files."); } + if (!StringUtils.equals(options.getNullStringValue(), + options.getNullNonStringValue())) { + throw new ImportException( + "Detected different values of --input-string and --input-non-string " + + "parameters. PostgreSQL direct manager do not support that. Please " + + "either use the same values or omit the --direct parameter."); + } + String commandFilename = null; String passwordFilename = null; Process p = null; http://git-wip-us.apache.org/repos/asf/sqoop/blob/a220ae46/src/java/org/apache/sqoop/util/SubstitutionUtils.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/util/SubstitutionUtils.java b/src/java/org/apache/sqoop/util/SubstitutionUtils.java new file mode 100644 index 0000000..c1a468d --- /dev/null +++ b/src/java/org/apache/sqoop/util/SubstitutionUtils.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.util; + +import java.util.HashMap; +import java.util.Map; + +/** + * Utility class for null substitution character handling. + * + */ +public final class SubstitutionUtils { + + // List of items that needs to be de-escaped in order to be consistent with + // Sqoop interpretation of the NULL string parameters. + private static final Map<String, String> REMOVE_ESCAPE_CHARS; + + static { + // Build static map of escape characters that needs to be de-escaped. + // http://docs.oracle.com/javase/specs/jls/se7/html/jls-3.html#jls-3.10.6 + REMOVE_ESCAPE_CHARS = new HashMap<String, String>(); + REMOVE_ESCAPE_CHARS.put("\\\\b", "\b"); + REMOVE_ESCAPE_CHARS.put("\\\\t", "\t"); + REMOVE_ESCAPE_CHARS.put("\\\\n", "\n"); + REMOVE_ESCAPE_CHARS.put("\\\\f", "\f"); + REMOVE_ESCAPE_CHARS.put("\\\\'", "'"); + REMOVE_ESCAPE_CHARS.put("\\\\\"", "\""); + REMOVE_ESCAPE_CHARS.put("\\\\\\\\", "\\\\"); + // TODO(jarcec, optional): Deal with octal escape sequences? + } + + /** + * De-escape all escape sequences presented in the string. + * + * Sqoop is historically using --(input)-null-(non-)string parameters directly + * in generated code and thus they need to be manually escaped on command + * line. However some connectors might need their final form, so that they + * can be also used outside generated code. + * + * @param string String to de-escape + * @return String without escape sequences + */ + public static String removeEscapeCharacters(String string) { + + // Our de-escaping is not suporting octal escape sequences + if (string.matches("\\\\[0-9]+")) { + throw new RuntimeException("Octal escape sequence is not supported"); + } + + for (Map.Entry<String, String> entry : REMOVE_ESCAPE_CHARS.entrySet()) { + string = string.replaceAll(entry.getKey(), entry.getValue()); + } + return string; + } + + private SubstitutionUtils() { + // This class can't be instantiated + } +} http://git-wip-us.apache.org/repos/asf/sqoop/blob/a220ae46/src/test/com/cloudera/sqoop/manager/PostgresqlImportTest.java ---------------------------------------------------------------------- diff --git a/src/test/com/cloudera/sqoop/manager/PostgresqlImportTest.java b/src/test/com/cloudera/sqoop/manager/PostgresqlImportTest.java index 512b1d5..ee00c41 100644 --- a/src/test/com/cloudera/sqoop/manager/PostgresqlImportTest.java +++ b/src/test/com/cloudera/sqoop/manager/PostgresqlImportTest.java @@ -92,6 +92,7 @@ public class PostgresqlImportTest extends ImportJobTestCase { static final String DATABASE_USER = "sqooptest"; static final String DATABASE_NAME = "sqooptest"; static final String TABLE_NAME = "EMPLOYEES_PG"; + static final String NULL_TABLE_NAME = "NULL_EMPLOYEES_PG"; static final String SPECIAL_TABLE_NAME = "EMPLOYEES_PG's"; static final String DIFFERENT_TABLE_NAME = "DIFFERENT_TABLE"; static final String SCHEMA_PUBLIC = "public"; @@ -109,14 +110,15 @@ public class PostgresqlImportTest extends ImportJobTestCase { LOG.debug("Setting up another postgresql test: " + CONNECT_STRING); - setUpData(TABLE_NAME, SCHEMA_PUBLIC); - setUpData(SPECIAL_TABLE_NAME, SCHEMA_PUBLIC); - setUpData(DIFFERENT_TABLE_NAME, SCHEMA_SPECIAL); + setUpData(TABLE_NAME, SCHEMA_PUBLIC, false); + setUpData(NULL_TABLE_NAME, SCHEMA_PUBLIC, true); + setUpData(SPECIAL_TABLE_NAME, SCHEMA_PUBLIC, false); + setUpData(DIFFERENT_TABLE_NAME, SCHEMA_SPECIAL, false); LOG.debug("setUp complete."); } - public void setUpData(String tableName, String schema) { + public void setUpData(String tableName, String schema, boolean nullEntry) { SqoopOptions options = new SqoopOptions(CONNECT_STRING, tableName); options.setUsername(DATABASE_USER); @@ -169,6 +171,11 @@ public class PostgresqlImportTest extends ImportJobTestCase { + " VALUES(2,'Bob','2009-04-20',400.00,'sales')"); st.executeUpdate("INSERT INTO " + fullTableName + " VALUES(3,'Fred','2009-01-23',15.00,'marketing')"); + if (nullEntry) { + st.executeUpdate("INSERT INTO " + fullTableName + + " VALUES(4,'Mike',NULL,NULL,NULL)"); + + } connection.commit(); } catch (SQLException sqlE) { LOG.error("Encountered SQL Exception: " + sqlE); @@ -347,4 +354,20 @@ public class PostgresqlImportTest extends ImportJobTestCase { doImportAndVerify(true, expectedResults, DIFFERENT_TABLE_NAME, extraArgs); } + + @Test + public void testNullEscapeCharacters() throws Exception { + String [] expectedResults = { + "2,Bob,2009-04-20,400,sales", + "3,Fred,2009-01-23,15,marketing", + "4,Mike,\\N,\\N,\\N", + }; + + String [] extraArgs = { + "--null-string", "\\\\\\\\N", + "--null-non-string", "\\\\\\\\N", + }; + + doImportAndVerify(true, expectedResults, NULL_TABLE_NAME, extraArgs); + } } http://git-wip-us.apache.org/repos/asf/sqoop/blob/a220ae46/src/test/com/cloudera/sqoop/util/TestSubstitutionUtils.java ---------------------------------------------------------------------- diff --git a/src/test/com/cloudera/sqoop/util/TestSubstitutionUtils.java b/src/test/com/cloudera/sqoop/util/TestSubstitutionUtils.java new file mode 100644 index 0000000..cd13adc --- /dev/null +++ b/src/test/com/cloudera/sqoop/util/TestSubstitutionUtils.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.cloudera.sqoop.util; + +import junit.framework.TestCase; +import org.apache.sqoop.util.SubstitutionUtils; + +/** + * + */ +public class TestSubstitutionUtils extends TestCase { + + public void testRemoveEscapeCharacters() { + assertEquals("\\N", SubstitutionUtils.removeEscapeCharacters("\\\\N")); + assertEquals("\n", SubstitutionUtils.removeEscapeCharacters("\\n")); + assertEquals("\b", SubstitutionUtils.removeEscapeCharacters("\\b")); + assertEquals("\t", SubstitutionUtils.removeEscapeCharacters("\\t")); + assertEquals("\f", SubstitutionUtils.removeEscapeCharacters("\\f")); + assertEquals("\'", SubstitutionUtils.removeEscapeCharacters("\\'")); + assertEquals("\"", SubstitutionUtils.removeEscapeCharacters("\\\"")); + assertEquals("sqoop", SubstitutionUtils.removeEscapeCharacters("sqoop")); + } +}
