Updated Branches: refs/heads/sqoop2 b00d13ac9 -> cf3d71049
SQOOP-735 Introduce output format option to Sqoop import (Jarek Jarcec Cecho) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/cf3d7104 Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/cf3d7104 Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/cf3d7104 Branch: refs/heads/sqoop2 Commit: cf3d71049d1909c4bf7c7bcd664f92f028f91e38 Parents: 8a5cd67 Author: Bilung Lee <[email protected]> Authored: Mon Dec 3 18:03:31 2012 -0800 Committer: Bilung Lee <[email protected]> Committed: Mon Dec 3 18:03:31 2012 -0800 ---------------------------------------------------------------------- .../sqoop/framework/configuration/OutputForm.java | 2 + .../framework/configuration/OutputFormat.java | 33 +++++++++++++++ .../main/resources/framework-resources.properties | 3 + .../mapreduce/MapreduceExecutionEngine.java | 17 +++++++- .../apache/sqoop/job/MapreduceExecutionError.java | 3 + 5 files changed, 57 insertions(+), 1 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/cf3d7104/core/src/main/java/org/apache/sqoop/framework/configuration/OutputForm.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/sqoop/framework/configuration/OutputForm.java b/core/src/main/java/org/apache/sqoop/framework/configuration/OutputForm.java index 3f70b5f..1b2cec7 100644 --- a/core/src/main/java/org/apache/sqoop/framework/configuration/OutputForm.java +++ b/core/src/main/java/org/apache/sqoop/framework/configuration/OutputForm.java @@ -28,5 +28,7 @@ public class OutputForm { @Input public StorageType storageType; + @Input public OutputFormat outputFormat; + @Input(size = 25) public String outputDirectory; } http://git-wip-us.apache.org/repos/asf/sqoop/blob/cf3d7104/core/src/main/java/org/apache/sqoop/framework/configuration/OutputFormat.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/sqoop/framework/configuration/OutputFormat.java b/core/src/main/java/org/apache/sqoop/framework/configuration/OutputFormat.java new file mode 100644 index 0000000..4cd3589 --- /dev/null +++ b/core/src/main/java/org/apache/sqoop/framework/configuration/OutputFormat.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.framework.configuration; + +/** + * Various supported formats on disk + */ +public enum OutputFormat { + /** + * Comma separated text file + */ + TEXT_FILE, + + /** + * Sequence file + */ + SEQUENCE_FILE, +} http://git-wip-us.apache.org/repos/asf/sqoop/blob/cf3d7104/core/src/main/resources/framework-resources.properties ---------------------------------------------------------------------- diff --git a/core/src/main/resources/framework-resources.properties b/core/src/main/resources/framework-resources.properties index 5ddf32d..019f5ca 100644 --- a/core/src/main/resources/framework-resources.properties +++ b/core/src/main/resources/framework-resources.properties @@ -35,6 +35,9 @@ output.help = You must supply the information requested in order to \ output.storageType.label = Storage type output.storageType.help = Target on Hadoop ecosystem where to store data +output.outputFormat.label = Output format +output.outputFormat.help = Format in which data should be serialized + output.outputDirectory.label = Output directory output.outputDirectory.help = Output directory for final data http://git-wip-us.apache.org/repos/asf/sqoop/blob/cf3d7104/execution/mapreduce/src/main/java/org/apache/sqoop/execution/mapreduce/MapreduceExecutionEngine.java ---------------------------------------------------------------------- diff --git a/execution/mapreduce/src/main/java/org/apache/sqoop/execution/mapreduce/MapreduceExecutionEngine.java b/execution/mapreduce/src/main/java/org/apache/sqoop/execution/mapreduce/MapreduceExecutionEngine.java index 4a5b305..3248e77 100644 --- a/execution/mapreduce/src/main/java/org/apache/sqoop/execution/mapreduce/MapreduceExecutionEngine.java +++ b/execution/mapreduce/src/main/java/org/apache/sqoop/execution/mapreduce/MapreduceExecutionEngine.java @@ -19,9 +19,14 @@ package org.apache.sqoop.execution.mapreduce; import org.apache.hadoop.io.NullWritable; import org.apache.sqoop.common.MutableMapContext; +import org.apache.sqoop.common.SqoopException; import org.apache.sqoop.framework.ExecutionEngine; import org.apache.sqoop.framework.SubmissionRequest; +import org.apache.sqoop.framework.configuration.ImportJobConfiguration; +import org.apache.sqoop.framework.configuration.OutputFormat; import org.apache.sqoop.job.JobConstants; +import org.apache.sqoop.job.MapreduceExecutionError; +import org.apache.sqoop.job.etl.HdfsSequenceImportLoader; import org.apache.sqoop.job.etl.HdfsTextImportLoader; import org.apache.sqoop.job.etl.Importer; import org.apache.sqoop.job.io.Data; @@ -42,6 +47,7 @@ public class MapreduceExecutionEngine extends ExecutionEngine { @Override public void prepareImportSubmission(SubmissionRequest gRequest) { MRSubmissionRequest request = (MRSubmissionRequest) gRequest; + ImportJobConfiguration jobConf = (ImportJobConfiguration) request.getConfigFrameworkJob(); // Configure map-reduce classes for import request.setInputFormatClass(SqoopInputFormat.class); @@ -61,6 +67,15 @@ public class MapreduceExecutionEngine extends ExecutionEngine { context.setString(JobConstants.JOB_ETL_PARTITIONER, importer.getPartitioner().getName()); context.setString(JobConstants.JOB_ETL_EXTRACTOR, importer.getExtractor().getName()); context.setString(JobConstants.JOB_ETL_DESTROYER, importer.getDestroyer().getName()); - context.setString(JobConstants.JOB_ETL_LOADER, HdfsTextImportLoader.class.getName()); + + // TODO: This settings should be abstracted to core module at some point + if(jobConf.output.outputFormat == OutputFormat.TEXT_FILE) { + context.setString(JobConstants.JOB_ETL_LOADER, HdfsTextImportLoader.class.getName()); + } else if(jobConf.output.outputFormat == OutputFormat.SEQUENCE_FILE) { + context.setString(JobConstants.JOB_ETL_LOADER, HdfsSequenceImportLoader.class.getName()); + } else { + throw new SqoopException(MapreduceExecutionError.MAPRED_EXEC_0024, + "Format: " + jobConf.output.outputFormat); + } } } http://git-wip-us.apache.org/repos/asf/sqoop/blob/cf3d7104/execution/mapreduce/src/main/java/org/apache/sqoop/job/MapreduceExecutionError.java ---------------------------------------------------------------------- diff --git a/execution/mapreduce/src/main/java/org/apache/sqoop/job/MapreduceExecutionError.java b/execution/mapreduce/src/main/java/org/apache/sqoop/job/MapreduceExecutionError.java index aa1f733..30956f3 100644 --- a/execution/mapreduce/src/main/java/org/apache/sqoop/job/MapreduceExecutionError.java +++ b/execution/mapreduce/src/main/java/org/apache/sqoop/job/MapreduceExecutionError.java @@ -73,6 +73,9 @@ public enum MapreduceExecutionError implements ErrorCode { /** Unknown job type */ MAPRED_EXEC_0023("Unknown job type"), + /** Unsupported output format type found **/ + MAPRED_EXEC_0024("Unknown output format type"), + ; private final String message;
