Repository: sqoop
Updated Branches:
  refs/heads/trunk c109f6717 -> 4180d50c7


SQOOP-2294: Change to Avro schema name breaks some use cases

(Qian Xu via Jarek Jarcec Cecho)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/4180d50c
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/4180d50c
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/4180d50c

Branch: refs/heads/trunk
Commit: 4180d50c74713fbe635660572759b2cdb8603f80
Parents: c109f67
Author: Jarek Jarcec Cecho <[email protected]>
Authored: Tue Apr 21 07:02:37 2015 -0700
Committer: Jarek Jarcec Cecho <[email protected]>
Committed: Tue Apr 21 07:02:37 2015 -0700

----------------------------------------------------------------------
 .../sqoop/mapreduce/DataDrivenImportJob.java     | 19 +++++++++++--------
 .../apache/sqoop/orm/AvroSchemaGenerator.java    |  5 +++--
 src/java/org/apache/sqoop/tool/CodeGenTool.java  | 11 +++++++++++
 3 files changed, 25 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/4180d50c/src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java 
b/src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java
index e70d23c..d5bfae2 100644
--- a/src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java
+++ b/src/java/org/apache/sqoop/mapreduce/DataDrivenImportJob.java
@@ -23,8 +23,6 @@ import java.io.IOException;
 import java.sql.SQLException;
 
 import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericRecord;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -88,7 +86,8 @@ public class DataDrivenImportJob extends ImportJobBase {
       job.setOutputValueClass(NullWritable.class);
     } else if (options.getFileLayout()
         == SqoopOptions.FileLayout.AvroDataFile) {
-      Schema schema = generateAvroSchema(tableName);
+      final String schemaNameOverride = null;
+      Schema schema = generateAvroSchema(tableName, schemaNameOverride);
       try {
         writeAvroSchema(schema);
       } catch (final IOException e) {
@@ -99,9 +98,12 @@ public class DataDrivenImportJob extends ImportJobBase {
     } else if (options.getFileLayout()
         == SqoopOptions.FileLayout.ParquetFile) {
       Configuration conf = job.getConfiguration();
-      // An Avro schema is required for creating a dataset that manages
-      // Parquet data records. The import will fail, if schema is invalid.
-      Schema schema = generateAvroSchema(tableName);
+      // Kite SDK requires an Avro schema to represent the data structure of
+      // target dataset. If the schema name equals to generated java class 
name,
+      // the import will fail. So we use table name as schema name and add a
+      // prefix "codegen_" to generated java class to avoid the conflict.
+      final String schemaNameOverride = tableName;
+      Schema schema = generateAvroSchema(tableName, schemaNameOverride);
       String uri = getKiteUri(conf, tableName);
       ParquetJob.configureImportJob(conf, schema, uri, options.isAppendMode(),
           options.doHiveImport() && options.doOverwriteHiveTable());
@@ -123,11 +125,12 @@ public class DataDrivenImportJob extends ImportJobBase {
     }
   }
 
-  private Schema generateAvroSchema(String tableName) throws IOException {
+  private Schema generateAvroSchema(String tableName,
+      String schemaNameOverride) throws IOException {
     ConnManager connManager = getContext().getConnManager();
     AvroSchemaGenerator generator = new AvroSchemaGenerator(options,
         connManager, tableName);
-    return generator.generate();
+    return generator.generate(schemaNameOverride);
   }
 
   private void writeAvroSchema(final Schema schema) throws IOException {

http://git-wip-us.apache.org/repos/asf/sqoop/blob/4180d50c/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java 
b/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java
index ed8e8b1..2576673 100644
--- a/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java
+++ b/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java
@@ -55,7 +55,7 @@ public class AvroSchemaGenerator {
     this.tableName = table;
   }
 
-  public Schema generate() throws IOException {
+  public Schema generate(String schemaNameOverride) throws IOException {
     ClassWriter classWriter = new ClassWriter(options, connManager,
         tableName, null);
     Map<String, Integer> columnTypes = classWriter.getColumnTypes();
@@ -75,7 +75,8 @@ public class AvroSchemaGenerator {
     TableClassName tableClassName = new TableClassName(options);
     String shortClassName = tableClassName.getShortClassForTable(tableName);
     String avroTableName = (tableName == null ? TableClassName.QUERY_RESULT : 
tableName);
-    String avroName = "sqoop_import_" + (shortClassName == null ? 
avroTableName : shortClassName);
+    String avroName = schemaNameOverride != null ? schemaNameOverride :
+        (shortClassName == null ? avroTableName : shortClassName);
     String avroNamespace = tableClassName.getPackageForTable();
 
     String doc = "Sqoop import of " + avroTableName;

http://git-wip-us.apache.org/repos/asf/sqoop/blob/4180d50c/src/java/org/apache/sqoop/tool/CodeGenTool.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/tool/CodeGenTool.java 
b/src/java/org/apache/sqoop/tool/CodeGenTool.java
index 6bd7f1d..ab339ad 100644
--- a/src/java/org/apache/sqoop/tool/CodeGenTool.java
+++ b/src/java/org/apache/sqoop/tool/CodeGenTool.java
@@ -90,6 +90,17 @@ public class CodeGenTool extends 
com.cloudera.sqoop.tool.BaseSqoopTool {
       return null;
     }
     LOG.info("Beginning code generation");
+
+    if (options.getFileLayout() == SqoopOptions.FileLayout.ParquetFile) {
+      String className = options.getClassName() != null ?
+          options.getClassName() : options.getTableName();
+      if (className.equalsIgnoreCase(options.getTableName())) {
+        className = "codegen_" + className;
+        options.setClassName(className);
+        LOG.info("Will generate java class as " + options.getClassName());
+      }
+    }
+
     CompilationManager compileMgr = new CompilationManager(options);
     ClassWriter classWriter = new ClassWriter(options, manager, tableName,
         compileMgr);

Reply via email to