Repository: sqoop Updated Branches: refs/heads/trunk 2b70666b0 -> 5ccef5fd7
SQOOP-1904: Add support for DB2 XML data type (import use case) (Ying Cao via Attila Szabo) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/5ccef5fd Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/5ccef5fd Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/5ccef5fd Branch: refs/heads/trunk Commit: 5ccef5fd7683685eed09c33c8847645608fab7b2 Parents: 2b70666 Author: Attila Szabo <[email protected]> Authored: Fri Feb 24 09:44:12 2017 +0100 Committer: Attila Szabo <[email protected]> Committed: Fri Feb 24 09:44:12 2017 +0100 ---------------------------------------------------------------------- .../org/apache/sqoop/manager/Db2Manager.java | 101 ++++++++ .../manager/db2/DB2XmlTypeImportManualTest.java | 249 +++++++++++++++++++ 2 files changed, 350 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/5ccef5fd/src/java/org/apache/sqoop/manager/Db2Manager.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/manager/Db2Manager.java b/src/java/org/apache/sqoop/manager/Db2Manager.java index e39aa4c..52ab05e 100644 --- a/src/java/org/apache/sqoop/manager/Db2Manager.java +++ b/src/java/org/apache/sqoop/manager/Db2Manager.java @@ -23,7 +23,9 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; +import java.util.Map; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sqoop.mapreduce.db.Db2DataDrivenDBInputFormat; @@ -48,6 +50,11 @@ public class Db2Manager private static final String DRIVER_CLASS = "com.ibm.db2.jcc.DB2Driver"; + private static final String XML_TO_JAVA_DATA_TYPE = "String"; + + private Map<String, String> columnTypeNames; + + public Db2Manager(final SqoopOptions opts) { super(DRIVER_CLASS, opts); } @@ -123,4 +130,98 @@ public class Db2Manager return databases.toArray(new String[databases.size()]); } + + /** + * Return hive type for SQL type. + * + * @param tableName + * table name + * @param columnName + * column name + * @param sqlType + * sql data type + * @return hive type + */ + @Override + public String toHiveType(String tableName, String columnName, int sqlType) { + String hiveType = super.toHiveType(tableName, columnName, sqlType); + if (hiveType == null) { + hiveType = toDbSpecificHiveType(tableName, columnName); + } + return hiveType; + } + + /** + * Resolve a database-specific type to the Hive type that should contain it. + * + * @param tableName + * table name + * @param colName + * column name + * @return the name of a Hive type to hold the sql datatype, or null if + * none. + */ + private String toDbSpecificHiveType(String tableName, String colName) { + if (columnTypeNames == null) { + columnTypeNames = getColumnTypeNames(tableName, options.getCall(), + options.getSqlQuery()); + } + LOG.debug("database-specific Column Types and names returned = (" + + StringUtils.join(columnTypeNames.keySet(), ",") + ")=>(" + + StringUtils.join(columnTypeNames.values(), ",") + ")"); + + String colTypeName = columnTypeNames.get(colName); + + if (colTypeName != null) { + if (colTypeName.toUpperCase().startsWith("XML")) { + return XML_TO_JAVA_DATA_TYPE; + } + } + return null; + } + + /** + * Return java type for SQL type. + * + * @param tableName + * table name + * @param columnName + * column name + * @param sqlType + * sql type + * @return java type + */ + @Override + public String toJavaType(String tableName, String columnName, int sqlType) { + String javaType = super.toJavaType(tableName, columnName, sqlType); + if (javaType == null) { + javaType = toDbSpecificJavaType(tableName, columnName); + } + return javaType; + } + + /** + * Resolve a database-specific type to the Java type that should contain it. + * + * @param tableName + * table name + * @param colName + * column name + * @return the name of a Java type to hold the sql datatype, or null if + * none. + */ + private String toDbSpecificJavaType(String tableName, String colName) { + if (columnTypeNames == null) { + columnTypeNames = getColumnTypeNames(tableName, options.getCall(), + options.getSqlQuery()); + } + String colTypeName = columnTypeNames.get(colName); + if (colTypeName != null) { + if (colTypeName.equalsIgnoreCase("XML")) { + return XML_TO_JAVA_DATA_TYPE; + } + } + return null; + } + } http://git-wip-us.apache.org/repos/asf/sqoop/blob/5ccef5fd/src/test/org/apache/sqoop/manager/db2/DB2XmlTypeImportManualTest.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/manager/db2/DB2XmlTypeImportManualTest.java b/src/test/org/apache/sqoop/manager/db2/DB2XmlTypeImportManualTest.java new file mode 100644 index 0000000..2ae3af8 --- /dev/null +++ b/src/test/org/apache/sqoop/manager/db2/DB2XmlTypeImportManualTest.java @@ -0,0 +1,249 @@ + /** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.manager.db2; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.sql.Connection; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IOUtils; +import org.apache.sqoop.manager.Db2Manager; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.testutil.CommonArgs; +import com.cloudera.sqoop.testutil.ImportJobTestCase; +import com.cloudera.sqoop.util.FileListing; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * Test the DB2 XML data type. + * + * This uses JDBC to import data from an DB2 database into HDFS. + * + * Since this requires an DB2 Server installation, + * this class is named in such a way that Sqoop's default QA process does + * not run it. You need to run this manually with + * -Dtestcase=DB2XmlTypeImportManualTest + + * You need to put DB2 JDBC driver library (db2jcc.jar) in a location + * where Sqoop will be able to access it (since this library cannot be checked + * into Apache's tree for licensing reasons). + * + * To set up your test environment: + * Install DB2 Express 9.7 C server. + * Create a database SQOOP + * Create a login SQOOP with password PASSWORD and grant all + * access for database SQOOP to user SQOOP. + */ +public class DB2XmlTypeImportManualTest extends ImportJobTestCase { + + public static final Log LOG = LogFactory.getLog( + DB2XmlTypeImportManualTest.class.getName()); + + static final String HOST_URL = System.getProperty( + "sqoop.test.db2.connectstring.host_url", + "jdbc:db2://db2host:60000"); + + static final String DATABASE_NAME = System.getProperty( + "sqoop.test.db2.connectstring.database", + "SQOOP"); + static final String DATABASE_USER = System.getProperty( + "sqoop.test.db2.connectstring.username", + "SQOOP"); + static final String DATABASE_PASSWORD = System.getProperty( + "sqoop.test.db2.connectstring.password", + "SQOOP"); + + static final String TABLE_NAME = "COMPANY"; + static final String CONNECT_STRING = HOST_URL + + "/" + DATABASE_NAME; + static final String HIVE_TABLE_NAME = "HCOMPANY"; + static String ExpectedResults = + "1,doc1,<company name=\"Company1\"><emp id=\"31201\" salary=\"60000\" gender=\"Female\"><name><first>Laura </first><last>Brown</last></name><dept id=\"M25\">Finance</dept></emp></company>"; + + + static { + LOG.info("Using DB2 CONNECT_STRING HOST_URL is : "+HOST_URL); + LOG.info("Using DB2 CONNECT_STRING: " + CONNECT_STRING); + } + + // instance variables populated during setUp, used during tests + private Db2Manager manager; + + protected String getTableName() { + return TABLE_NAME; + } + + + @Before + public void setUp() { + super.setUp(); + + SqoopOptions options = new SqoopOptions(CONNECT_STRING, getTableName()); + options.setUsername(DATABASE_USER); + options.setPassword(DATABASE_PASSWORD); + + manager = new Db2Manager(options); + + // Drop the existing table, if there is one. + Connection conn = null; + Statement stmt = null; + try { + conn = manager.getConnection(); + stmt = conn.createStatement(); + stmt.execute("DROP TABLE " + getTableName()); + } catch (SQLException sqlE) { + LOG.info("Table was not dropped: " + sqlE.getMessage()); + } finally { + try { + if (null != stmt) { + stmt.close(); + } + } catch (Exception ex) { + LOG.warn("Exception while closing stmt", ex); + } + } + + // Create and populate table + try { + conn = manager.getConnection(); + conn.setAutoCommit(false); + stmt = conn.createStatement(); + String xml ="xmlparse(document '<company name=\"Company1\">\n"+ + "<emp id=\"31201\" salary=\"60000\" gender=\"Female\">"+ + "<name><first>Laura </first><last>Brown</last></name>"+ + "<dept id=\"M25\">Finance</dept></emp></company>')"; + + + // create the database table and populate it with data. + stmt.executeUpdate("CREATE TABLE " + getTableName() + " (" + + "ID int, " + + "DOCNAME VARCHAR(20), " + + "DOC XML)"); + + stmt.executeUpdate("INSERT INTO " + getTableName() + " VALUES(" + + "1,'doc1', " + + xml + +" )"); + conn.commit(); + } catch (SQLException sqlE) { + LOG.error("Encountered SQL Exception: ", sqlE); + fail("SQLException when running test setUp(): " + sqlE); + } finally { + try { + if (null != stmt) { + stmt.close(); + } + } catch (Exception ex) { + LOG.warn("Exception while closing connection/stmt", ex); + } + } + } + + @After + public void tearDown() { + super.tearDown(); + try { + manager.close(); + } catch (SQLException sqlE) { + LOG.error("Got SQLException: " + sqlE); + } + } + + @Test + public void testDb2Import() throws IOException { + + runDb2Test(ExpectedResults); + + } + + private String [] getArgv() { + ArrayList<String> args = new ArrayList<String>(); + + CommonArgs.addHadoopFlags(args); + args.add("--connect"); + args.add(CONNECT_STRING); + args.add("--username"); + args.add(DATABASE_USER); + args.add("--password"); + args.add(DATABASE_PASSWORD); + + args.add("--table"); + args.add(TABLE_NAME); + args.add("--warehouse-dir"); + args.add(getWarehouseDir()); + args.add("--hive-table"); + args.add(HIVE_TABLE_NAME); + args.add("--num-mappers"); + args.add("1"); + + return args.toArray(new String[0]); + } + + private void runDb2Test(String expectedResults) throws IOException { + + Path warehousePath = new Path(this.getWarehouseDir()); + Path tablePath = new Path(warehousePath, getTableName()); + Path filePath = new Path(tablePath, "part-m-00000"); + + File tableFile = new File(tablePath.toString()); + if (tableFile.exists() && tableFile.isDirectory()) { + // remove the directory before running the import. + FileListing.recursiveDeleteDir(tableFile); + } + + String [] argv = getArgv(); + try { + runImport(argv); + LOG.info("finish runImport with argv is : "+argv); + } catch (IOException ioe) { + LOG.error("Got IOException during import: " + ioe); + fail(ioe.toString()); + } + + File f = new File(filePath.toString()); + assertTrue("Could not find imported data file", f.exists()); + BufferedReader r = null; + try { + // Read through the file and make sure it's all there. + r = new BufferedReader(new InputStreamReader(new FileInputStream(f))); + assertEquals(expectedResults, r.readLine()); + } catch (IOException ioe) { + LOG.error("Got IOException verifying results: " + ioe); + fail(ioe.toString()); + } finally { + IOUtils.closeStream(r); + } + } + +}
