Repository: sqoop Updated Branches: refs/heads/branch-1.4.6 08c61a11e -> 67e8c9a9e
SQOOP-2294: Change to Avro schema name breaks some use cases (Qian Xu via Jarek Jarcec Cecho) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/67e8c9a9 Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/67e8c9a9 Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/67e8c9a9 Branch: refs/heads/branch-1.4.6 Commit: 67e8c9a9e8048dee227682e3fc76aedae1d100de Parents: 08c61a1 Author: Jarek Jarcec Cecho <[email protected]> Authored: Tue Apr 21 07:02:37 2015 -0700 Committer: Jarek Jarcec Cecho <[email protected]> Committed: Tue Apr 21 08:37:37 2015 -0700 ---------------------------------------------------------------------- .../sqoop/mapreduce/DataDrivenImportJob.java | 19 +++++++++++-------- .../apache/sqoop/orm/AvroSchemaGenerator.java | 5 +++-- src/java/org/apache/sqoop/tool/CodeGenTool.java | 11 +++++++++++ 3 files changed, 25 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/67e8c9a9/src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java b/src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java index e70d23c..d5bfae2 100644 --- a/src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java +++ b/src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java @@ -23,8 +23,6 @@ import java.io.IOException; import java.sql.SQLException; import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericRecord; import org.apache.commons.io.FileUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -88,7 +86,8 @@ public class DataDrivenImportJob extends ImportJobBase { job.setOutputValueClass(NullWritable.class); } else if (options.getFileLayout() == SqoopOptions.FileLayout.AvroDataFile) { - Schema schema = generateAvroSchema(tableName); + final String schemaNameOverride = null; + Schema schema = generateAvroSchema(tableName, schemaNameOverride); try { writeAvroSchema(schema); } catch (final IOException e) { @@ -99,9 +98,12 @@ public class DataDrivenImportJob extends ImportJobBase { } else if (options.getFileLayout() == SqoopOptions.FileLayout.ParquetFile) { Configuration conf = job.getConfiguration(); - // An Avro schema is required for creating a dataset that manages - // Parquet data records. The import will fail, if schema is invalid. - Schema schema = generateAvroSchema(tableName); + // Kite SDK requires an Avro schema to represent the data structure of + // target dataset. If the schema name equals to generated java class name, + // the import will fail. So we use table name as schema name and add a + // prefix "codegen_" to generated java class to avoid the conflict. + final String schemaNameOverride = tableName; + Schema schema = generateAvroSchema(tableName, schemaNameOverride); String uri = getKiteUri(conf, tableName); ParquetJob.configureImportJob(conf, schema, uri, options.isAppendMode(), options.doHiveImport() && options.doOverwriteHiveTable()); @@ -123,11 +125,12 @@ public class DataDrivenImportJob extends ImportJobBase { } } - private Schema generateAvroSchema(String tableName) throws IOException { + private Schema generateAvroSchema(String tableName, + String schemaNameOverride) throws IOException { ConnManager connManager = getContext().getConnManager(); AvroSchemaGenerator generator = new AvroSchemaGenerator(options, connManager, tableName); - return generator.generate(); + return generator.generate(schemaNameOverride); } private void writeAvroSchema(final Schema schema) throws IOException { http://git-wip-us.apache.org/repos/asf/sqoop/blob/67e8c9a9/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java b/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java index ed8e8b1..2576673 100644 --- a/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java +++ b/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java @@ -55,7 +55,7 @@ public class AvroSchemaGenerator { this.tableName = table; } - public Schema generate() throws IOException { + public Schema generate(String schemaNameOverride) throws IOException { ClassWriter classWriter = new ClassWriter(options, connManager, tableName, null); Map<String, Integer> columnTypes = classWriter.getColumnTypes(); @@ -75,7 +75,8 @@ public class AvroSchemaGenerator { TableClassName tableClassName = new TableClassName(options); String shortClassName = tableClassName.getShortClassForTable(tableName); String avroTableName = (tableName == null ? TableClassName.QUERY_RESULT : tableName); - String avroName = "sqoop_import_" + (shortClassName == null ? avroTableName : shortClassName); + String avroName = schemaNameOverride != null ? schemaNameOverride : + (shortClassName == null ? avroTableName : shortClassName); String avroNamespace = tableClassName.getPackageForTable(); String doc = "Sqoop import of " + avroTableName; http://git-wip-us.apache.org/repos/asf/sqoop/blob/67e8c9a9/src/java/org/apache/sqoop/tool/CodeGenTool.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/tool/CodeGenTool.java b/src/java/org/apache/sqoop/tool/CodeGenTool.java index 6bd7f1d..ab339ad 100644 --- a/src/java/org/apache/sqoop/tool/CodeGenTool.java +++ b/src/java/org/apache/sqoop/tool/CodeGenTool.java @@ -90,6 +90,17 @@ public class CodeGenTool extends com.cloudera.sqoop.tool.BaseSqoopTool { return null; } LOG.info("Beginning code generation"); + + if (options.getFileLayout() == SqoopOptions.FileLayout.ParquetFile) { + String className = options.getClassName() != null ? + options.getClassName() : options.getTableName(); + if (className.equalsIgnoreCase(options.getTableName())) { + className = "codegen_" + className; + options.setClassName(className); + LOG.info("Will generate java class as " + options.getClassName()); + } + } + CompilationManager compileMgr = new CompilationManager(options); ClassWriter classWriter = new ClassWriter(options, manager, tableName, compileMgr);
