Updated Branches: refs/heads/trunk 0488503a3 -> 7c5b46fb2
SQOOP-830: HBase import formatting BigDecimal inconsistently (David Robson via Jarek Jarcec Cecho) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/7c5b46fb Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/7c5b46fb Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/7c5b46fb Branch: refs/heads/trunk Commit: 7c5b46fb2860e7401e84542a01a61ef415cbe519 Parents: 0488503 Author: Jarek Jarcec Cecho <[email protected]> Authored: Thu Jan 31 09:26:42 2013 -0800 Committer: Jarek Jarcec Cecho <[email protected]> Committed: Thu Jan 31 09:26:42 2013 -0800 ---------------------------------------------------------------------- src/docs/user/import.txt | 34 +++++ .../org/apache/sqoop/hbase/HBasePutProcessor.java | 7 + .../apache/sqoop/hbase/ToStringPutTransformer.java | 16 ++- .../apache/sqoop/mapreduce/AvroImportMapper.java | 16 ++- .../org/apache/sqoop/mapreduce/ImportJobBase.java | 8 + src/java/org/apache/sqoop/orm/ClassWriter.java | 11 ++ .../cloudera/sqoop/testutil/BaseSqoopTestCase.java | 54 ++++--- .../org/apache/sqoop/TestBigDecimalExport.java | 112 +++++++++++++++ .../org/apache/sqoop/TestBigDecimalImport.java | 86 +++++++++++ 9 files changed, 315 insertions(+), 29 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/docs/user/import.txt ---------------------------------------------------------------------- diff --git a/src/docs/user/import.txt b/src/docs/user/import.txt index 82e74dd..9bc4fc9 100644 --- a/src/docs/user/import.txt +++ b/src/docs/user/import.txt @@ -575,6 +575,40 @@ $ sqoop import --table SomeTable --jar-file mydatatypes.jar \ This command will load the +SomeTableType+ class out of +mydatatypes.jar+. +Additional Import Configuration Properties +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +There are some additional properties which can be configured by modifying ++conf/sqoop-site.xml+. Properties can be specified the same as in Hadoop +configuration files, for example: + +---- + <property> + <name>property.name</name> + <value>property.value</value> + </property> +---- + +They can also be specified on the command line in the generic arguments, for +example: + +---- +sqoop import -D property.name=property.value ... +---- + +.Additional import configuration properties: +[grid="all"] +`-------------------------------------`---------------------------------------- +Argument Description +------------------------------------------------------------------------------- ++sqoop.bigdecimal.format.string+ Controls how BigDecimal columns will \ + formatted when stored as a String. A \ + value of +true+ (default) will use \ + toPlainString to store them without an \ + exponent component (0.0000001); while \ + a value of +false+ will use toString \ + which may include an exponent (1E-7) +------------------------------------------------------------------------------- + Example Invocations ~~~~~~~~~~~~~~~~~~~ http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java b/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java index 64a1d18..cca641f 100644 --- a/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java +++ b/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java @@ -28,6 +28,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.util.ReflectionUtils; +import org.apache.sqoop.mapreduce.ImportJobBase; import com.cloudera.sqoop.lib.FieldMappable; import com.cloudera.sqoop.lib.FieldMapProcessor; @@ -90,6 +91,12 @@ public class HBasePutProcessor implements Closeable, Configurable, this.putTransformer.setColumnFamily(conf.get(COL_FAMILY_KEY, null)); this.putTransformer.setRowKeyColumn(conf.get(ROW_KEY_COLUMN_KEY, null)); + if (this.putTransformer instanceof ToStringPutTransformer) { + ((ToStringPutTransformer) this.putTransformer).bigDecimalFormatString = + conf.getBoolean(ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT, + ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT); + } + this.tableName = conf.get(TABLE_NAME_KEY, null); try { this.table = new HTable(conf, this.tableName); http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java b/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java index 1f52ba9..131fd43 100644 --- a/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java +++ b/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java @@ -19,6 +19,7 @@ package org.apache.sqoop.hbase; import java.io.IOException; +import java.math.BigDecimal; import java.util.Collections; import java.util.List; import java.util.Map; @@ -42,6 +43,7 @@ public class ToStringPutTransformer extends PutTransformer { // A mapping from field name -> bytes for that field name. // Used to cache serialization work done for fields names. private Map<String, byte[]> serializedFieldNames; + protected boolean bigDecimalFormatString; public ToStringPutTransformer() { serializedFieldNames = new TreeMap<String, byte[]>(); @@ -81,7 +83,7 @@ public class ToStringPutTransformer extends PutTransformer { return null; } - Put put = new Put(Bytes.toBytes(rowKey.toString())); + Put put = new Put(Bytes.toBytes(toHBaseString(rowKey))); for (Map.Entry<String, Object> fieldEntry : fields.entrySet()) { String colName = fieldEntry.getKey(); @@ -91,7 +93,7 @@ public class ToStringPutTransformer extends PutTransformer { Object val = fieldEntry.getValue(); if (null != val) { put.add(colFamilyBytes, getFieldNameBytes(colName), - Bytes.toBytes(val.toString())); + Bytes.toBytes(toHBaseString(val))); } } } @@ -99,4 +101,14 @@ public class ToStringPutTransformer extends PutTransformer { return Collections.singletonList(put); } + private String toHBaseString(Object val) { + String valString; + if (val instanceof BigDecimal && bigDecimalFormatString) { + valString = ((BigDecimal) val).toPlainString(); + } else { + valString = val.toString(); + } + return valString; + } + } http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/java/org/apache/sqoop/mapreduce/AvroImportMapper.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/mapreduce/AvroImportMapper.java b/src/java/org/apache/sqoop/mapreduce/AvroImportMapper.java index 30db288..289eb28 100644 --- a/src/java/org/apache/sqoop/mapreduce/AvroImportMapper.java +++ b/src/java/org/apache/sqoop/mapreduce/AvroImportMapper.java @@ -30,6 +30,7 @@ import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; import org.apache.avro.mapred.AvroWrapper; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; @@ -51,13 +52,18 @@ public class AvroImportMapper new AvroWrapper<GenericRecord>(); private Schema schema; private LargeObjectLoader lobLoader; + private boolean bigDecimalFormatString; @Override protected void setup(Context context) throws IOException, InterruptedException { - schema = AvroJob.getMapOutputSchema(context.getConfiguration()); - lobLoader = new LargeObjectLoader(context.getConfiguration(), + Configuration conf = context.getConfiguration(); + schema = AvroJob.getMapOutputSchema(conf); + lobLoader = new LargeObjectLoader(conf, FileOutputFormat.getWorkOutputPath(context)); + bigDecimalFormatString = conf.getBoolean( + ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT, + ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT); } @Override @@ -99,7 +105,11 @@ public class AvroImportMapper */ private Object toAvro(Object o) { if (o instanceof BigDecimal) { - return o.toString(); + if (bigDecimalFormatString) { + return ((BigDecimal)o).toPlainString(); + } else { + return o.toString(); + } } else if (o instanceof Date) { return ((Date) o).getTime(); } else if (o instanceof Time) { http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/java/org/apache/sqoop/mapreduce/ImportJobBase.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/mapreduce/ImportJobBase.java b/src/java/org/apache/sqoop/mapreduce/ImportJobBase.java index f6e2e72..f766532 100644 --- a/src/java/org/apache/sqoop/mapreduce/ImportJobBase.java +++ b/src/java/org/apache/sqoop/mapreduce/ImportJobBase.java @@ -58,6 +58,14 @@ public class ImportJobBase extends JobBase { public static final Log LOG = LogFactory.getLog( ImportJobBase.class.getName()); + /** Controls how java.math.BigDecimal values should be converted to Strings + * If set to true (default) then will call toPlainString() method. + * If set to false then will call toString() method. + */ + public static final String PROPERTY_BIGDECIMAL_FORMAT = + "sqoop.bigdecimal.format.string"; + public static final boolean PROPERTY_BIGDECIMAL_FORMAT_DEFAULT = true; + public ImportJobBase() { this(null); } http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/java/org/apache/sqoop/orm/ClassWriter.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/orm/ClassWriter.java b/src/java/org/apache/sqoop/orm/ClassWriter.java index 126b406..136982c 100644 --- a/src/java/org/apache/sqoop/orm/ClassWriter.java +++ b/src/java/org/apache/sqoop/orm/ClassWriter.java @@ -33,6 +33,7 @@ import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.BytesWritable; +import org.apache.sqoop.mapreduce.ImportJobBase; import com.cloudera.sqoop.SqoopOptions; import com.cloudera.sqoop.manager.ConnManager; @@ -116,6 +117,7 @@ public class ClassWriter { private ConnManager connManager; private String tableName; private CompilationManager compileManager; + private boolean bigDecimalFormatString; /** * Creates a new ClassWriter to generate an ORM class for a table @@ -131,6 +133,9 @@ public class ClassWriter { this.connManager = connMgr; this.tableName = table; this.compileManager = compMgr; + this.bigDecimalFormatString = this.options.getConf().getBoolean( + ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT, + ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT); } /** @@ -317,6 +322,12 @@ public class ClassWriter { String r = colName + "==null?\"" + this.options.getNullStringValue() + "\":" + colName; return r; + } else if (javaType.equals("java.math.BigDecimal") + && this.bigDecimalFormatString) { + // Use toPlainString method for BigDecimals if option is set + String r = colName + "==null?\"" + this.options.getNullNonStringValue() + + "\":" + colName + ".toPlainString()"; + return r; } else { // This is an object type -- just call its toString() in a null-safe way. // Also check if it is null, and instead write the null representation http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/test/com/cloudera/sqoop/testutil/BaseSqoopTestCase.java ---------------------------------------------------------------------- diff --git a/src/test/com/cloudera/sqoop/testutil/BaseSqoopTestCase.java b/src/test/com/cloudera/sqoop/testutil/BaseSqoopTestCase.java index f9370c4..cf41b96 100644 --- a/src/test/com/cloudera/sqoop/testutil/BaseSqoopTestCase.java +++ b/src/test/com/cloudera/sqoop/testutil/BaseSqoopTestCase.java @@ -312,13 +312,9 @@ public abstract class BaseSqoopTestCase extends TestCase { for (int i = 0; i < colTypes.length; i++) { String colName = BASE_COL_NAME + Integer.toString(i); columnDefStr += colName + " " + colTypes[i]; - columnListStr += colName; - valueListStr += vals[i]; myColNames[i] = colName; if (i < colTypes.length - 1) { columnDefStr += ", "; - columnListStr += ", "; - valueListStr += ", "; } } @@ -344,27 +340,37 @@ public abstract class BaseSqoopTestCase extends TestCase { } } - try { - String insertValsStr = "INSERT INTO " + getTableName() - + "(" + columnListStr + ")" - + " VALUES(" + valueListStr + ")"; - LOG.info("Inserting values: " + insertValsStr); - statement = conn.prepareStatement( - insertValsStr, - ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); - statement.executeUpdate(); - } catch (SQLException sqlException) { - fail("Could not create table: " - + StringUtils.stringifyException(sqlException)); - } finally { - if (null != statement) { - try { - statement.close(); - } catch (SQLException se) { - // Ignore exception on close. + if (vals!=null) { + for (int i = 0; i < colTypes.length; i++) { + columnListStr += myColNames[i]; + valueListStr += vals[i]; + if (i < colTypes.length - 1) { + columnListStr += ", "; + valueListStr += ", "; + } + } + try { + String insertValsStr = "INSERT INTO " + getTableName() + + "(" + columnListStr + ")" + + " VALUES(" + valueListStr + ")"; + LOG.info("Inserting values: " + insertValsStr); + statement = conn.prepareStatement( + insertValsStr, + ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); + statement.executeUpdate(); + } catch (SQLException sqlException) { + fail("Could not create table: " + + StringUtils.stringifyException(sqlException)); + } finally { + if (null != statement) { + try { + statement.close(); + } catch (SQLException se) { + // Ignore exception on close. + } + + statement = null; } - - statement = null; } } http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/test/org/apache/sqoop/TestBigDecimalExport.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/TestBigDecimalExport.java b/src/test/org/apache/sqoop/TestBigDecimalExport.java new file mode 100644 index 0000000..80cdad5 --- /dev/null +++ b/src/test/org/apache/sqoop/TestBigDecimalExport.java @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop; + +import java.io.BufferedWriter; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.math.BigDecimal; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.cloudera.sqoop.testutil.CommonArgs; +import com.cloudera.sqoop.testutil.ExportJobTestCase; + +/** + * Test exporting lines that are created via both options of + * sqoop.bigdecimal.format.string parameter. + */ +public class TestBigDecimalExport extends ExportJobTestCase { + + private void runBigDecimalExport(String line) + throws IOException, SQLException { + FileSystem fs = FileSystem.get(getConf()); + Path tablePath = getTablePath(); + fs.mkdirs(tablePath); + Path filePath = getDataFilePath(); + DataOutputStream stream = fs.create(filePath); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(stream)); + writer.write(line); + writer.close(); + String[] types = + { "DECIMAL", "NUMERIC" }; + createTableWithColTypes(types, null); + + List<String> args = new ArrayList<String>(); + + CommonArgs.addHadoopFlags(args); + + args.add("--table"); + args.add(getTableName()); + args.add("--export-dir"); + args.add(tablePath.toString()); + args.add("--connect"); + args.add(getConnectString()); + args.add("-m"); + args.add("1"); + + runExport(args.toArray(new String[args.size()])); + + BigDecimal actual1 = null; + BigDecimal actual2 = null; + + Connection conn = getConnection(); + try { + PreparedStatement stmt = conn.prepareStatement("SELECT * FROM " + + getTableName()); + try { + ResultSet rs = stmt.executeQuery(); + try { + rs.next(); + actual1 = rs.getBigDecimal(1); + actual2 = rs.getBigDecimal(2); + } finally { + rs.close(); + } + } finally { + stmt.close(); + } + } finally { + conn.close(); + } + + BigDecimal expected1 = new BigDecimal("0.000001"); + BigDecimal expected2 = new BigDecimal("0.0000001"); + + assertEquals(expected1, actual1); + assertEquals(expected2, actual2); + } + + public void testBigDecimalDefault() throws IOException, SQLException { + runBigDecimalExport("0.000001,0.0000001"); + } + + public void testBigDecimalNoFormat() throws IOException, SQLException { + runBigDecimalExport("0.000001,1E-7"); + } + +} http://git-wip-us.apache.org/repos/asf/sqoop/blob/7c5b46fb/src/test/org/apache/sqoop/TestBigDecimalImport.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/TestBigDecimalImport.java b/src/test/org/apache/sqoop/TestBigDecimalImport.java new file mode 100644 index 0000000..76e4704 --- /dev/null +++ b/src/test/org/apache/sqoop/TestBigDecimalImport.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop; + +import java.io.BufferedReader; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.cloudera.sqoop.testutil.CommonArgs; +import com.cloudera.sqoop.testutil.ImportJobTestCase; + +/** + * Test the sqoop.bigdecimal.format.string parameter default behavior and when + * set to false. + */ +public class TestBigDecimalImport extends ImportJobTestCase { + + private String runBigDecimalImport(List<String> extraArgs) + throws IOException { + String[] types = + { "DECIMAL", "NUMERIC" }; + String[] vals = { "0.000001", "0.0000001" }; + createTableWithColTypes(types, vals); + List<String> args = new ArrayList<String>(); + + CommonArgs.addHadoopFlags(args); + + if (extraArgs!=null) { + args.addAll(extraArgs); + } + args.add("--table"); + args.add(getTableName()); + args.add("--warehouse-dir"); + args.add(getWarehouseDir()); + args.add("--connect"); + args.add(getConnectString()); + args.add("-m"); + args.add("1"); + + runImport(args.toArray(new String[args.size()])); + + Path outputFile = getDataFilePath(); + FileSystem fs = FileSystem.get(getConf()); + DataInputStream stream = fs.open(outputFile); + BufferedReader reader = new BufferedReader(new InputStreamReader(stream)); + String line = reader.readLine(); + reader.close(); + return line; + } + + public void testBigDecimalDefault() throws IOException { + String line = runBigDecimalImport(null); + assertEquals("0.000001,0.0000001", line); + } + + public void testBigDecimalNoFormat() throws IOException { + List<String> args = new ArrayList<String>(); + args.add("-Dsqoop.bigdecimal.format.string=false"); + + String line = runBigDecimalImport(args); + assertEquals("0.000001,1E-7", line); + } + +}
