Repository: sqoop Updated Branches: refs/heads/trunk 83f0c1a40 -> 28bbe4d46
Revert "SQOOP-2920: sqoop performance deteriorates significantly on wide datasets; sqoop 100% on cpu" I've mistakenly committed SQOOP-2920 and SQOOP-2906 inside this commit, so I'll revert it and commit them separately. Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/ac217a03 Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/ac217a03 Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/ac217a03 Branch: refs/heads/trunk Commit: ac217a032c0755edac713ff53b3f55b7f2f46706 Parents: 83f0c1a Author: Jarek Jarcec Cecho <[email protected]> Authored: Thu May 19 07:26:12 2016 -0700 Committer: Jarek Jarcec Cecho <[email protected]> Committed: Thu May 19 07:26:12 2016 -0700 ---------------------------------------------------------------------- src/java/org/apache/sqoop/avro/AvroUtil.java | 22 +----- src/java/org/apache/sqoop/orm/ClassWriter.java | 83 ++++++++++---------- .../apache/sqoop/orm/CompilationManager.java | 8 +- .../com/cloudera/sqoop/orm/TestClassWriter.java | 63 +-------------- 4 files changed, 51 insertions(+), 125 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/ac217a03/src/java/org/apache/sqoop/avro/AvroUtil.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/avro/AvroUtil.java b/src/java/org/apache/sqoop/avro/AvroUtil.java index ee29f14..319be0f 100644 --- a/src/java/org/apache/sqoop/avro/AvroUtil.java +++ b/src/java/org/apache/sqoop/avro/AvroUtil.java @@ -114,25 +114,11 @@ public final class AvroUtil { * Format candidate to avro specifics */ public static String toAvroIdentifier(String candidate) { - char[] data = candidate.toCharArray(); - boolean skip = false; - int stringIndex = 0; - - for (char c:data) { - if (Character.isLetterOrDigit(c) || c == '_') { - data[stringIndex++] = c; - skip = false; - } else if(!skip) { - data[stringIndex++] = '_'; - skip = true; - } - } - - char initial = data[0]; - if (Character.isLetter(initial) || initial == '_') { - return new String(data, 0, stringIndex); + String formattedCandidate = candidate.replaceAll("\\W+", "_"); + if (formattedCandidate.substring(0,1).matches("[a-zA-Z_]")) { + return formattedCandidate; } else { - return "AVRO_".concat(new String(data, 0, stringIndex)); + return "AVRO_" + formattedCandidate; } } http://git-wip-us.apache.org/repos/asf/sqoop/blob/ac217a03/src/java/org/apache/sqoop/orm/ClassWriter.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/orm/ClassWriter.java b/src/java/org/apache/sqoop/orm/ClassWriter.java index 9d91887..23a9c41 100644 --- a/src/java/org/apache/sqoop/orm/ClassWriter.java +++ b/src/java/org/apache/sqoop/orm/ClassWriter.java @@ -1064,49 +1064,57 @@ public class ClassWriter { * @param colNames - ordered list of column names for table. * @param sb - StringBuilder to append code to */ - private void generateSetField(Map<String, Integer> columnTypes, String[] colNames, String[] rawColNames, - StringBuilder sb) { - String sep = System.getProperty("line.separator"); - sb.append(" public void setField(String __fieldName, Object __fieldVal) " + "{" + sep); - sb.append(" if (!setters.containsKey(__fieldName)) {" + sep); - sb.append(" throw new RuntimeException(\"No such field:\"+__fieldName);" + sep); - sb.append(" }" + sep); - sb.append(" setters.get(__fieldName).setField(__fieldVal);" + sep); - sb.append(" }\n" + sep); - } + private void generateSetField(Map<String, Integer> columnTypes, + String [] colNames, String [] rawColNames, StringBuilder sb) { + + int numberOfMethods = + this.getNumberOfMethods(colNames, maxColumnsPerMethod); - private void generateConstructorAndInitMethods(Map<String, Integer> colTypes, String[] colNames, String[] rawColNames, - String typeName, StringBuilder sb) { - String sep = System.getProperty("line.separator"); - int numberOfMethods = getNumberOfMethods(colNames, maxColumnsPerMethod); - for (int methodNumber = 0; methodNumber < numberOfMethods; ++methodNumber) { - sb.append(" private void init" + methodNumber + "() {" + sep); - for (int i = methodNumber * maxColumnsPerMethod; i < topBoundary(colNames, methodNumber, - maxColumnsPerMethod); ++i) { - String colName = colNames[i]; - String rawColName = rawColNames[i]; - int sqlType = colTypes.get(colName); - String javaType = toJavaType(colName, sqlType); + sb.append(" public void setField(String __fieldName, Object __fieldVal) " + + "{\n"); + if (numberOfMethods > 1) { + boolean first = true; + for (int i = 0; i < numberOfMethods; ++i) { + if (!first) { + sb.append(" else"); + } + sb.append(" if (this.setField" + i + + "(__fieldName, __fieldVal)) {\n"); + sb.append(" return;\n"); + sb.append(" }\n"); + first = false; + } + } else { + boolean first = true; + for (int i = 0; i < colNames.length; i++) { + int sqlType = columnTypes.get(colNames[i]); + String javaType = toJavaType(colNames[i], sqlType); if (null == javaType) { - LOG.error("Cannot resolve SQL type " + sqlType); continue; } else { - sb.append(" setters.put(\"" + serializeRawColName(rawColName) + "\", new FieldSetterCommand() {" + sep); - sb.append(" @Override" + sep); - sb.append(" public void setField(Object value) {" + sep); - sb.append(" " + colName + " = (" + javaType + ")value;" + sep); - sb.append(" }" + sep); - sb.append(" });" + sep); + if (!first) { + sb.append(" else"); + } + + sb.append(" if (\"" + serializeRawColName(rawColNames[i]) + "\".equals(__fieldName)) {\n"); + sb.append(" this." + colNames[i] + " = (" + javaType + + ") __fieldVal;\n"); + sb.append(" }\n"); + first = false; } } - sb.append(" }" + sep); } - sb.append(" public " + typeName + "() {" + sep); + sb.append(" else {\n"); + sb.append(" throw new RuntimeException("); + sb.append("\"No such field: \" + __fieldName);\n"); + sb.append(" }\n"); + sb.append(" }\n"); + for (int i = 0; i < numberOfMethods; ++i) { - sb.append(" init" + i + "();" + sep); + myGenerateSetField(columnTypes, colNames, rawColNames, sb, i, maxColumnsPerMethod); } - sb.append(" }" + sep); } + /** * Raw column name is a column name as it was created on database and we need to serialize it between * double quotes into java class that will be further complied with javac. Various databases supports @@ -1176,7 +1184,7 @@ public class ClassWriter { sb.append(" public Map<String, Object> getFieldMap() {\n"); sb.append(" Map<String, Object> __sqoop$field_map = " - + "new HashMap<String, Object>();\n"); + + "new TreeMap<String, Object>();\n"); if (numberOfMethods > 1) { for (int i = 0; i < numberOfMethods; ++i) { sb.append(" this.getFieldMap" + i + "(__sqoop$field_map);\n"); @@ -1926,7 +1934,7 @@ public class ClassWriter { sb.append("import java.util.Iterator;\n"); sb.append("import java.util.List;\n"); sb.append("import java.util.Map;\n"); - sb.append("import java.util.HashMap;\n"); + sb.append("import java.util.TreeMap;\n"); sb.append("\n"); String className = tableNameInfo.getShortClassForTable(tableName); @@ -1936,12 +1944,7 @@ public class ClassWriter { + CLASS_WRITER_VERSION + ";\n"); sb.append( " public int getClassFormatVersion() { return PROTOCOL_VERSION; }\n"); - sb.append(" public static interface FieldSetterCommand {"); - sb.append(" void setField(Object value);"); - sb.append(" }"); sb.append(" protected ResultSet __cur_result_set;\n"); - sb.append(" private Map<String, FieldSetterCommand> setters = new HashMap<String, FieldSetterCommand>();\n"); - generateConstructorAndInitMethods(columnTypes, colNames, rawColNames, className, sb); generateFields(columnTypes, colNames, className, sb); generateEquals(columnTypes, colNames, className, sb); generateDbRead(columnTypes, colNames, sb); http://git-wip-us.apache.org/repos/asf/sqoop/blob/ac217a03/src/java/org/apache/sqoop/orm/CompilationManager.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/orm/CompilationManager.java b/src/java/org/apache/sqoop/orm/CompilationManager.java index 0a2a87f..ce165e8 100644 --- a/src/java/org/apache/sqoop/orm/CompilationManager.java +++ b/src/java/org/apache/sqoop/orm/CompilationManager.java @@ -296,15 +296,11 @@ public class CompilationManager { // we only record the subdir parts in the zip entry. String fullPath = entry.getAbsolutePath(); String chompedPath = fullPath.substring(baseDirName.length()); - int indexOfDollarSign = chompedPath.indexOf("$"); - String innerTypesChompedPath = chompedPath - .substring(0, indexOfDollarSign == -1 ? chompedPath.length() : indexOfDollarSign); boolean include = chompedPath.endsWith(".class") - && (sources.contains( + && sources.contains( chompedPath.substring(0, chompedPath.length() - ".class".length()) - + ".java") - || sources.contains(innerTypesChompedPath + ".java")); + + ".java"); if (include) { // include this file. http://git-wip-us.apache.org/repos/asf/sqoop/blob/ac217a03/src/test/com/cloudera/sqoop/orm/TestClassWriter.java ---------------------------------------------------------------------- diff --git a/src/test/com/cloudera/sqoop/orm/TestClassWriter.java b/src/test/com/cloudera/sqoop/orm/TestClassWriter.java index 10a0969..498db73 100644 --- a/src/test/com/cloudera/sqoop/orm/TestClassWriter.java +++ b/src/test/com/cloudera/sqoop/orm/TestClassWriter.java @@ -26,7 +26,6 @@ import java.lang.reflect.Method; import java.sql.Connection; import java.sql.Statement; import java.sql.SQLException; -import java.util.Random; import java.util.jar.JarEntry; import java.util.jar.JarInputStream; @@ -58,9 +57,6 @@ public class TestClassWriter extends TestCase { public static final Log LOG = LogFactory.getLog(TestClassWriter.class.getName()); - private static final String WIDE_TABLE_NAME = "WIDETABLE"; - private static final int WIDE_TABLE_COLUMN_COUNT = 800; - private static final int WIDE_TABLE_ROW_COUNT = 20_000; // instance variables populated during setUp, used during tests private HsqldbTestServer testServer; @@ -126,16 +122,12 @@ public class TestClassWriter extends TestCase { static final String JAR_GEN_DIR = ImportJobTestCase.TEMP_BASE_DIR + "sqoop/test/jargen"; - private File runGenerationTest(String[] argv, String classNameToCheck) { - return runGenerationTest(argv, classNameToCheck, HsqldbTestServer.getTableName()); - } - /** * Run a test to verify that we can generate code and it emits the output * files where we expect them. * @return */ - private File runGenerationTest(String[] argv, String classNameToCheck, String tableName) { + private File runGenerationTest(String [] argv, String classNameToCheck) { File codeGenDirFile = new File(CODE_GEN_DIR); File classGenDirFile = new File(JAR_GEN_DIR); @@ -148,7 +140,7 @@ public class TestClassWriter extends TestCase { CompilationManager compileMgr = new CompilationManager(options); ClassWriter writer = new ClassWriter(options, manager, - tableName, compileMgr); + HsqldbTestServer.getTableName(), compileMgr); try { writer.generate(); @@ -683,55 +675,4 @@ public class TestClassWriter extends TestCase { }; runFailedGenerationTest(argv, HsqldbTestServer.getTableName()); } - - @Test(timeout = 10000) - public void testWideTableClassGeneration() throws Exception { - createWideTable(); - options = new SqoopOptions(HsqldbTestServer.getDbUrl(), WIDE_TABLE_NAME); - - // Set the option strings in an "argv" to redirect our srcdir and bindir. - String [] argv = { - "--bindir", - JAR_GEN_DIR, - "--outdir", - CODE_GEN_DIR, - }; - - File ormJarFile = runGenerationTest(argv, WIDE_TABLE_NAME, WIDE_TABLE_NAME); - - ClassLoader prevClassLoader = ClassLoaderStack.addJarFile(ormJarFile.getCanonicalPath(), - WIDE_TABLE_NAME); - Class tableClass = Class.forName(WIDE_TABLE_NAME, true, - Thread.currentThread().getContextClassLoader()); - - Object instance = tableClass.newInstance(); - Method setterMethod = tableClass.getMethod("setField", String.class, Object.class); - Random random = new Random(0); - for (int j = 0; j < WIDE_TABLE_ROW_COUNT; ++j) { - for (int i = 0; i < WIDE_TABLE_COLUMN_COUNT; ++i) { - setterMethod.invoke(instance, "INTFIELD" + i, random.nextInt()); - } - } - - if (null != prevClassLoader) { - ClassLoaderStack.setCurrentClassLoader(prevClassLoader); - } - } - - private void createWideTable() throws Exception { - try (Connection conn = testServer.getConnection(); Statement stmt = conn.createStatement();) { - stmt.executeUpdate("DROP TABLE \"" + WIDE_TABLE_NAME + "\" IF EXISTS"); - StringBuilder sb = new StringBuilder("CREATE TABLE \"" + WIDE_TABLE_NAME + "\" ("); - for (int i = 0; i < WIDE_TABLE_COLUMN_COUNT; ++i) { - sb.append("intField" + i + " INT"); - if (i < WIDE_TABLE_COLUMN_COUNT - 1) { - sb.append(","); - } else { - sb.append(")"); - } - } - stmt.executeUpdate(sb.toString()); - conn.commit(); - } - } }
