Repository: sqoop Updated Branches: refs/heads/trunk 6dd6a4fc8 -> 00a02dec2
SQOOP-3382: Add parquet numeric support for Parquet in hdfs import (Fero Szabo via Szabolcs Vasas) Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/00a02dec Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/00a02dec Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/00a02dec Branch: refs/heads/trunk Commit: 00a02dec2f7507f813ee4899096c470ba1112a9e Parents: 6dd6a4f Author: Szabolcs Vasas <[email protected]> Authored: Wed Nov 14 11:29:02 2018 +0100 Committer: Szabolcs Vasas <[email protected]> Committed: Wed Nov 14 11:29:02 2018 +0100 ---------------------------------------------------------------------- .../sqoop/config/ConfigurationConstants.java | 5 + .../sqoop/mapreduce/ParquetImportMapper.java | 8 +- .../HadoopParquetImportJobConfigurator.java | 14 + .../apache/sqoop/orm/AvroSchemaGenerator.java | 20 +- .../importjob/ImportJobTestConfiguration.java | 32 -- .../sqoop/importjob/NumericTypesImportTest.java | 327 +++++++++++++++++++ .../sqoop/importjob/SplitByImportTest.java | 8 +- .../avro/AvroImportForNumericTypesTest.java | 210 ------------ .../MSSQLServerImportJobTestConfiguration.java | 62 ---- .../MySQLImportJobTestConfiguration.java | 63 ---- .../OracleImportJobTestConfiguration.java | 64 ---- ...acleImportJobTestConfigurationForNumber.java | 65 ---- ...sqlImportJobTestConfigurationForNumeric.java | 64 ---- ...obTestConfigurationPaddingShouldSucceed.java | 62 ---- .../configuration/AvroTestConfiguration.java | 24 ++ ...enericImportJobSplitByTestConfiguration.java | 5 +- .../ImportJobTestConfiguration.java | 30 ++ .../MSSQLServerImportJobTestConfiguration.java | 68 ++++ .../MySQLImportJobTestConfiguration.java | 68 ++++ .../OracleImportJobTestConfiguration.java | 70 ++++ ...acleImportJobTestConfigurationForNumber.java | 71 ++++ .../configuration/ParquetTestConfiguration.java | 24 ++ ...sqlImportJobTestConfigurationForNumeric.java | 70 ++++ ...obTestConfigurationPaddingShouldSucceed.java | 68 ++++ .../org/apache/sqoop/util/ParquetReader.java | 44 ++- 25 files changed, 900 insertions(+), 646 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/java/org/apache/sqoop/config/ConfigurationConstants.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/config/ConfigurationConstants.java b/src/java/org/apache/sqoop/config/ConfigurationConstants.java index 3724f25..7592846 100644 --- a/src/java/org/apache/sqoop/config/ConfigurationConstants.java +++ b/src/java/org/apache/sqoop/config/ConfigurationConstants.java @@ -97,6 +97,11 @@ public final class ConfigurationConstants { public static final String PROP_ENABLE_AVRO_LOGICAL_TYPE_DECIMAL = "sqoop.avro.logical_types.decimal.enable"; /** + * Enable parquet logical types (decimal support only). + */ + public static final String PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL = "sqoop.parquet.logical_types.decimal.enable"; + + /** * Default precision for avro schema */ public static final String PROP_AVRO_DECIMAL_PRECISION = "sqoop.avro.logical_types.decimal.default.precision"; http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/java/org/apache/sqoop/mapreduce/ParquetImportMapper.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/mapreduce/ParquetImportMapper.java b/src/java/org/apache/sqoop/mapreduce/ParquetImportMapper.java index 62334f8..b386079 100644 --- a/src/java/org/apache/sqoop/mapreduce/ParquetImportMapper.java +++ b/src/java/org/apache/sqoop/mapreduce/ParquetImportMapper.java @@ -18,6 +18,9 @@ package org.apache.sqoop.mapreduce; +import org.apache.avro.Conversions; +import org.apache.avro.generic.GenericData; +import org.apache.sqoop.config.ConfigurationConstants; import org.apache.sqoop.lib.LargeObjectLoader; import org.apache.sqoop.lib.SqoopRecord; import org.apache.avro.Schema; @@ -39,6 +42,7 @@ public abstract class ParquetImportMapper<KEYOUT, VALOUT> private Schema schema = null; private boolean bigDecimalFormatString = true; private LargeObjectLoader lobLoader = null; + private boolean bigDecimalPadding; @Override protected void setup(Context context) @@ -49,6 +53,8 @@ public abstract class ParquetImportMapper<KEYOUT, VALOUT> ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT, ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT); lobLoader = createLobLoader(context); + GenericData.get().addLogicalTypeConversion(new Conversions.DecimalConversion()); + bigDecimalPadding = conf.getBoolean(ConfigurationConstants.PROP_ENABLE_AVRO_DECIMAL_PADDING, false); } @Override @@ -62,7 +68,7 @@ public abstract class ParquetImportMapper<KEYOUT, VALOUT> } GenericRecord record = AvroUtil.toGenericRecord(val.getFieldMap(), schema, - bigDecimalFormatString); + bigDecimalFormatString, bigDecimalPadding); write(context, record); } http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/java/org/apache/sqoop/mapreduce/parquet/hadoop/HadoopParquetImportJobConfigurator.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/mapreduce/parquet/hadoop/HadoopParquetImportJobConfigurator.java b/src/java/org/apache/sqoop/mapreduce/parquet/hadoop/HadoopParquetImportJobConfigurator.java index e821543..aa9740b 100644 --- a/src/java/org/apache/sqoop/mapreduce/parquet/hadoop/HadoopParquetImportJobConfigurator.java +++ b/src/java/org/apache/sqoop/mapreduce/parquet/hadoop/HadoopParquetImportJobConfigurator.java @@ -25,7 +25,9 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.parquet.avro.GenericDataSupplier; import org.apache.sqoop.SqoopOptions; +import org.apache.sqoop.config.ConfigurationConstants; import org.apache.sqoop.mapreduce.parquet.ParquetImportJobConfigurator; import org.apache.parquet.avro.AvroParquetOutputFormat; import org.apache.parquet.hadoop.ParquetOutputFormat; @@ -46,6 +48,18 @@ public class HadoopParquetImportJobConfigurator implements ParquetImportJobConfi public void configureMapper(Job job, Schema schema, SqoopOptions options, String tableName, Path destination) throws IOException { configureAvroSchema(job, schema); configureOutputCodec(job); + configureLogicalTypeSupport(job, options); + } + + /** + * Configurations needed for logical types, i.e. decimal in parquet. + * @param job + * @param options + */ + private void configureLogicalTypeSupport(Job job, SqoopOptions options) { + if (options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false)) { + AvroParquetOutputFormat.setAvroDataSupplier(job, GenericDataSupplier.class); + } } @Override http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java b/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java index 7a2a5f9..05ac46c 100644 --- a/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java +++ b/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java @@ -39,6 +39,9 @@ import org.apache.sqoop.avro.AvroUtil; import org.apache.sqoop.config.ConfigurationConstants; import org.codehaus.jackson.node.NullNode; +import static org.apache.sqoop.SqoopOptions.FileLayout.AvroDataFile; +import static org.apache.sqoop.SqoopOptions.FileLayout.ParquetFile; + /** * Creates an Avro schema to represent a table from a database. */ @@ -126,8 +129,7 @@ public class AvroSchemaGenerator { public Schema toAvroSchema(int sqlType, String columnName, Integer precision, Integer scale) { List<Schema> childSchemas = new ArrayList<Schema>(); childSchemas.add(Schema.create(Schema.Type.NULL)); - if (options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_AVRO_LOGICAL_TYPE_DECIMAL, false) - && isLogicalType(sqlType)) { + if (isLogicalTypeConversionEnabled() && isLogicalType(sqlType)) { childSchemas.add( toAvroLogicalType(columnName, sqlType, precision, scale) .addToSchema(Schema.create(Type.BYTES)) @@ -138,6 +140,20 @@ public class AvroSchemaGenerator { return Schema.createUnion(childSchemas); } + /** + * @return True if this is a parquet import and parquet logical types are enabled, + * or if this is an avro import and avro logical types are enabled. False otherwise. + */ + private boolean isLogicalTypeConversionEnabled() { + if (ParquetFile.equals(options.getFileLayout())) { + return options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false); + } + else if (AvroDataFile.equals(options.getFileLayout())) { + return options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_AVRO_LOGICAL_TYPE_DECIMAL, false); + } + return false; + } + public Schema toAvroSchema(int sqlType) { return toAvroSchema(sqlType, null, null, null); } http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/ImportJobTestConfiguration.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/ImportJobTestConfiguration.java b/src/test/org/apache/sqoop/importjob/ImportJobTestConfiguration.java deleted file mode 100644 index 14de910..0000000 --- a/src/test/org/apache/sqoop/importjob/ImportJobTestConfiguration.java +++ /dev/null @@ -1,32 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.sqoop.importjob; - -import java.util.List; - -public interface ImportJobTestConfiguration { - - String[] getTypes(); - - String[] getNames(); - - List<String[]> getSampleData(); - - String[] getExpectedResults(); -} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/NumericTypesImportTest.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/NumericTypesImportTest.java b/src/test/org/apache/sqoop/importjob/NumericTypesImportTest.java new file mode 100644 index 0000000..0714441 --- /dev/null +++ b/src/test/org/apache/sqoop/importjob/NumericTypesImportTest.java @@ -0,0 +1,327 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.importjob; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.OriginalType; +import org.apache.sqoop.SqoopOptions; +import org.apache.sqoop.importjob.configuration.AvroTestConfiguration; +import org.apache.sqoop.importjob.configuration.MSSQLServerImportJobTestConfiguration; +import org.apache.sqoop.importjob.configuration.MySQLImportJobTestConfiguration; +import org.apache.sqoop.importjob.configuration.OracleImportJobTestConfiguration; +import org.apache.sqoop.importjob.configuration.ParquetTestConfiguration; +import org.apache.sqoop.importjob.configuration.PostgresqlImportJobTestConfigurationForNumeric; +import org.apache.sqoop.testutil.ArgumentArrayBuilder; +import org.apache.sqoop.testutil.AvroTestUtils; +import org.apache.sqoop.testutil.ImportJobTestCase; +import org.apache.sqoop.testutil.adapter.DatabaseAdapter; +import org.apache.sqoop.testutil.adapter.MSSQLServerDatabaseAdapter; +import org.apache.sqoop.testutil.adapter.MySqlDatabaseAdapter; +import org.apache.sqoop.testutil.adapter.OracleDatabaseAdapter; +import org.apache.sqoop.testutil.adapter.PostgresDatabaseAdapter; +import org.apache.sqoop.importjob.configuration.OracleImportJobTestConfigurationForNumber; +import org.apache.sqoop.importjob.configuration.PostgresqlImportJobTestConfigurationPaddingShouldSucceed; +import org.apache.sqoop.util.ParquetReader; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.apache.sqoop.SqoopOptions.FileLayout.AvroDataFile; +import static org.apache.sqoop.SqoopOptions.FileLayout.ParquetFile; + +@RunWith(Parameterized.class) +/** + * This test covers the behavior of the Avro import for fixed point decimal types, i.e. NUMBER, NUMERIC + * and DECIMAL. + * + * Oracle and Postgres store numbers without padding, while other DBs store them padded with 0s. + * + * The features tested here affect two phases in Sqoop: + * 1. Avro schema generation during avro and parquet import + * Default precision and scale are used here to avoid issues with Oracle and Postgres, as these + * don't return valid precision and scale if they weren't specified in the table DDL. + * + * 2. Decimal padding during avro or parquet import + * In case of Oracle and Postgres, Sqoop has to pad the values with 0s to avoid errors. + */ +public class NumericTypesImportTest<T extends AvroTestConfiguration & ParquetTestConfiguration> extends ImportJobTestCase { + + public static final Log LOG = LogFactory.getLog(NumericTypesImportTest.class.getName()); + + private Configuration conf = new Configuration(); + + private final T configuration; + private final DatabaseAdapter adapter; + private final boolean failWithoutExtraArgs; + private final boolean failWithPadding; + + // Constants for the basic test case, that doesn't use extra arguments + // that are required to avoid errors, i.e. padding and default precision and scale. + private final static boolean SUCCEED_WITHOUT_EXTRA_ARGS = false; + private final static boolean FAIL_WITHOUT_EXTRA_ARGS = true; + + // Constants for the test case that has padding specified but not default precision and scale. + private final static boolean SUCCEED_WITH_PADDING_ONLY = false; + private final static boolean FAIL_WITH_PADDING_ONLY = true; + private Path tableDirPath; + + @Parameters(name = "Adapter: {0}| Config: {1}| failWithoutExtraArgs: {2}| failWithPadding: {3}") + public static Iterable<? extends Object> testConfigurations() { + DatabaseAdapter postgresAdapter = new PostgresDatabaseAdapter(); + OracleDatabaseAdapter oracleDatabaseAdapter = new OracleDatabaseAdapter(); + return Arrays.asList( + new Object[] {oracleDatabaseAdapter, new OracleImportJobTestConfigurationForNumber(), FAIL_WITHOUT_EXTRA_ARGS, FAIL_WITH_PADDING_ONLY}, + new Object[] {oracleDatabaseAdapter, new OracleImportJobTestConfiguration(), FAIL_WITHOUT_EXTRA_ARGS, SUCCEED_WITH_PADDING_ONLY}, + new Object[] { new MySqlDatabaseAdapter(), new MySQLImportJobTestConfiguration(), SUCCEED_WITHOUT_EXTRA_ARGS, SUCCEED_WITH_PADDING_ONLY}, + new Object[] { new MSSQLServerDatabaseAdapter(), new MSSQLServerImportJobTestConfiguration(), SUCCEED_WITHOUT_EXTRA_ARGS, SUCCEED_WITH_PADDING_ONLY}, + new Object[] { postgresAdapter, new PostgresqlImportJobTestConfigurationForNumeric(), FAIL_WITHOUT_EXTRA_ARGS, FAIL_WITH_PADDING_ONLY}, + new Object[] { postgresAdapter, new PostgresqlImportJobTestConfigurationPaddingShouldSucceed(), SUCCEED_WITHOUT_EXTRA_ARGS, SUCCEED_WITH_PADDING_ONLY} + ); + } + + public NumericTypesImportTest(DatabaseAdapter adapter, T configuration, boolean failWithoutExtraArgs, boolean failWithPaddingOnly) { + this.adapter = adapter; + this.configuration = configuration; + this.failWithoutExtraArgs = failWithoutExtraArgs; + this.failWithPadding = failWithPaddingOnly; + } + + @Rule + public ExpectedException thrown = ExpectedException.none(); + + @Override + protected Configuration getConf() { + return conf; + } + + @Override + protected boolean useHsqldbTestServer() { + return false; + } + + @Override + protected String getConnectString() { + return adapter.getConnectionString(); + } + + @Override + protected SqoopOptions getSqoopOptions(Configuration conf) { + SqoopOptions opts = new SqoopOptions(conf); + adapter.injectConnectionParameters(opts); + return opts; + } + + @Override + protected void dropTableIfExists(String table) throws SQLException { + adapter.dropTableIfExists(table, getManager()); + } + + @Before + public void setUp() { + super.setUp(); + String[] names = configuration.getNames(); + String[] types = configuration.getTypes(); + createTableWithColTypesAndNames(names, types, new String[0]); + List<String[]> inputData = configuration.getSampleData(); + for (String[] input : inputData) { + insertIntoTable(names, types, input); + } + tableDirPath = new Path(getWarehouseDir() + "/" + getTableName()); + } + + @After + public void tearDown() { + try { + dropTableIfExists(getTableName()); + } catch (SQLException e) { + LOG.warn("Error trying to drop table on tearDown: " + e); + } + super.tearDown(); + } + + private ArgumentArrayBuilder getArgsBuilder(SqoopOptions.FileLayout fileLayout) { + ArgumentArrayBuilder builder = new ArgumentArrayBuilder(); + if (AvroDataFile.equals(fileLayout)) { + builder.withOption("as-avrodatafile"); + } + else if (ParquetFile.equals(fileLayout)) { + builder.withOption("as-parquetfile"); + } + + return builder.withCommonHadoopFlags(true) + .withOption("warehouse-dir", getWarehouseDir()) + .withOption("num-mappers", "1") + .withOption("table", getTableName()) + .withOption("connect", getConnectString()); + } + + /** + * Adds properties to the given arg builder for decimal precision and scale. + * @param builder + */ + private void addPrecisionAndScale(ArgumentArrayBuilder builder) { + builder.withProperty("sqoop.avro.logical_types.decimal.default.precision", "38"); + builder.withProperty("sqoop.avro.logical_types.decimal.default.scale", "3"); + } + + /** + * Enables padding for decimals in avro and parquet import. + * @param builder + */ + private void addPadding(ArgumentArrayBuilder builder) { + builder.withProperty("sqoop.avro.decimal_padding.enable", "true"); + } + + private void addEnableAvroDecimal(ArgumentArrayBuilder builder) { + builder.withProperty("sqoop.avro.logical_types.decimal.enable", "true"); + } + + private void addEnableParquetDecimal(ArgumentArrayBuilder builder) { + builder.withProperty("sqoop.parquet.logical_types.decimal.enable", "true"); + } + + private void configureJunitToExpectFailure(boolean failWithPadding) { + if (failWithPadding) { + thrown.expect(IOException.class); + thrown.expectMessage("Failure during job; return status 1"); + } + } + + @Test + public void testAvroImportWithoutPadding() throws IOException { + configureJunitToExpectFailure(failWithoutExtraArgs); + ArgumentArrayBuilder builder = getArgsBuilder(AvroDataFile); + addEnableAvroDecimal(builder); + String[] args = builder.build(); + runImport(args); + if (!failWithoutExtraArgs) { + verify(AvroDataFile); + } + } + + @Test + public void testAvroImportWithPadding() throws IOException { + configureJunitToExpectFailure(failWithPadding); + ArgumentArrayBuilder builder = getArgsBuilder(AvroDataFile); + addEnableAvroDecimal(builder); + addPadding(builder); + runImport(builder.build()); + if (!failWithPadding) { + verify(AvroDataFile); + } + } + + @Test + public void testAvroImportWithDefaultPrecisionAndScale() throws IOException { + ArgumentArrayBuilder builder = getArgsBuilder(AvroDataFile); + addEnableAvroDecimal(builder); + addPadding(builder); + addPrecisionAndScale(builder); + runImport(builder.build()); + verify(AvroDataFile); + } + + @Test + public void testParquetImportWithoutPadding() throws IOException { + configureJunitToExpectFailure(failWithoutExtraArgs); + ArgumentArrayBuilder builder = getArgsBuilder(ParquetFile); + addEnableParquetDecimal(builder); + String[] args = builder.build(); + runImport(args); + if (!failWithoutExtraArgs) { + verify(ParquetFile); + } + } + + @Test + public void testParquetImportWithPadding() throws IOException { + configureJunitToExpectFailure(failWithPadding); + ArgumentArrayBuilder builder = getArgsBuilder(ParquetFile); + addEnableParquetDecimal(builder); + addPadding(builder); + runImport(builder.build()); + if (!failWithPadding) { + verify(ParquetFile); + } + } + + @Test + public void testParquetImportWithDefaultPrecisionAndScale() throws IOException { + ArgumentArrayBuilder builder = getArgsBuilder(ParquetFile); + addEnableParquetDecimal(builder); + addPadding(builder); + addPrecisionAndScale(builder); + runImport(builder.build()); + verify(ParquetFile); + } + + private void verify(SqoopOptions.FileLayout fileLayout) { + if (AvroDataFile.equals(fileLayout)) { + AvroTestUtils.registerDecimalConversionUsageForVerification(); + AvroTestUtils.verify(configuration.getExpectedResultsForAvro(), getConf(), getTablePath()); + } else if (ParquetFile.equals(fileLayout)) { + verifyParquetFile(); + } + } + + private void verifyParquetFile() { + verifyParquetSchema(); + verifyParquetContent(); + } + + private void verifyParquetContent() { + ParquetReader reader = new ParquetReader(tableDirPath); + assertEquals(Arrays.asList(configuration.getExpectedResultsForParquet()), reader.readAllInCsvSorted()); + } + + private void verifyParquetSchema() { + ParquetReader reader = new ParquetReader(tableDirPath); + MessageType parquetSchema = reader.readParquetSchema(); + + String[] types = configuration.getTypes(); + for (int i = 0; i < types.length; i ++) { + String type = types[i]; + if (isNumericSqlType(type)) { + OriginalType parquetFieldType = parquetSchema.getFields().get(i).getOriginalType(); + assertEquals(OriginalType.DECIMAL, parquetFieldType); + } + } + } + + private boolean isNumericSqlType(String type) { + return type.toUpperCase().startsWith("DECIMAL") + || type.toUpperCase().startsWith("NUMBER") + || type.toUpperCase().startsWith("NUMERIC"); + } +} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/SplitByImportTest.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/SplitByImportTest.java b/src/test/org/apache/sqoop/importjob/SplitByImportTest.java index 7977c0b..c6fe4f2 100644 --- a/src/test/org/apache/sqoop/importjob/SplitByImportTest.java +++ b/src/test/org/apache/sqoop/importjob/SplitByImportTest.java @@ -24,6 +24,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.sqoop.SqoopOptions; import org.apache.sqoop.importjob.configuration.GenericImportJobSplitByTestConfiguration; +import org.apache.sqoop.importjob.configuration.ImportJobTestConfiguration; +import org.apache.sqoop.importjob.configuration.ParquetTestConfiguration; import org.apache.sqoop.testutil.ArgumentArrayBuilder; import org.apache.sqoop.testutil.ImportJobTestCase; import org.apache.sqoop.testutil.adapter.DatabaseAdapter; @@ -55,7 +57,7 @@ public class SplitByImportTest extends ImportJobTestCase { private Configuration conf = new Configuration(); - private final ImportJobTestConfiguration configuration; + private final ParquetTestConfiguration configuration; private final DatabaseAdapter adapter; @Parameters(name = "Adapter: {0}| Config: {1}") @@ -69,7 +71,7 @@ public class SplitByImportTest extends ImportJobTestCase { ); } - public SplitByImportTest(DatabaseAdapter adapter, ImportJobTestConfiguration configuration) { + public SplitByImportTest(DatabaseAdapter adapter, ParquetTestConfiguration configuration) { this.adapter = adapter; this.configuration = configuration; } @@ -148,6 +150,6 @@ public class SplitByImportTest extends ImportJobTestCase { private void verifyParquetFile() { ParquetReader reader = new ParquetReader(new Path(getWarehouseDir() + "/" + getTableName()), getConf()); - assertEquals(asList(configuration.getExpectedResults()), reader.readAllInCsvSorted()); + assertEquals(asList(configuration.getExpectedResultsForParquet()), reader.readAllInCsvSorted()); } } http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/avro/AvroImportForNumericTypesTest.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/avro/AvroImportForNumericTypesTest.java b/src/test/org/apache/sqoop/importjob/avro/AvroImportForNumericTypesTest.java deleted file mode 100644 index ff13dc3..0000000 --- a/src/test/org/apache/sqoop/importjob/avro/AvroImportForNumericTypesTest.java +++ /dev/null @@ -1,210 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * <p> - * http://www.apache.org/licenses/LICENSE-2.0 - * <p> - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.sqoop.importjob.avro; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.sqoop.SqoopOptions; -import org.apache.sqoop.importjob.ImportJobTestConfiguration; -import org.apache.sqoop.testutil.ArgumentArrayBuilder; -import org.apache.sqoop.testutil.AvroTestUtils; -import org.apache.sqoop.testutil.ImportJobTestCase; -import org.apache.sqoop.testutil.adapter.DatabaseAdapter; -import org.apache.sqoop.testutil.adapter.MSSQLServerDatabaseAdapter; -import org.apache.sqoop.testutil.adapter.MySqlDatabaseAdapter; -import org.apache.sqoop.testutil.adapter.OracleDatabaseAdapter; -import org.apache.sqoop.testutil.adapter.PostgresDatabaseAdapter; -import org.apache.sqoop.importjob.avro.configuration.MSSQLServerImportJobTestConfiguration; -import org.apache.sqoop.importjob.avro.configuration.MySQLImportJobTestConfiguration; -import org.apache.sqoop.importjob.avro.configuration.OracleImportJobTestConfigurationForNumber; -import org.apache.sqoop.importjob.avro.configuration.OracleImportJobTestConfiguration; -import org.apache.sqoop.importjob.avro.configuration.PostgresqlImportJobTestConfigurationForNumeric; -import org.apache.sqoop.importjob.avro.configuration.PostgresqlImportJobTestConfigurationPaddingShouldSucceed; -import org.junit.After; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -import java.io.IOException; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.List; - -@RunWith(Parameterized.class) -/** - * This test covers the behavior of the Avro import for fixed point decimal types, i.e. NUMBER, NUMERIC - * and DECIMAL. - * - * Oracle and Postgres store numbers without padding, while other DBs store them padded with 0s. - * - * The features tested here affect two phases in Sqoop: - * 1. Avro schema generation - * Default precision and scale are used here to avoid issues with Oracle and Postgres, as these - * don't return valid precision and scale if they weren't specified in the table DDL. - * - * 2. Avro import: padding. - * In case of Oracle and Postgres, Sqoop has to pad the values with 0s to avoid errors. - */ -public class AvroImportForNumericTypesTest extends ImportJobTestCase { - - public static final Log LOG = LogFactory.getLog( - AvroImportForNumericTypesTest.class.getName()); - - private Configuration conf = new Configuration(); - - private final ImportJobTestConfiguration configuration; - private final DatabaseAdapter adapter; - private final boolean failWithoutExtraArgs; - private final boolean failWithPadding; - - // Constants for the basic test case, that doesn't use extra arguments - // that are required to avoid errors, i.e. padding and default precision and scale. - private final static boolean SUCCEED_WITHOUT_EXTRA_ARGS = false; - private final static boolean FAIL_WITHOUT_EXTRA_ARGS = true; - - // Constants for the test case that has padding specified but not default precision and scale. - private final static boolean SUCCEED_WITH_PADDING_ONLY = false; - private final static boolean FAIL_WITH_PADDING_ONLY = true; - - @Parameters(name = "Adapter: {0}| Config: {1}| failWithoutExtraArgs: {2}| failWithPadding: {3}") - public static Iterable<? extends Object> testConfigurations() { - DatabaseAdapter postgresAdapter = new PostgresDatabaseAdapter(); - OracleDatabaseAdapter oracleDatabaseAdapter = new OracleDatabaseAdapter(); - return Arrays.asList( - new Object[] {oracleDatabaseAdapter, new OracleImportJobTestConfigurationForNumber(), FAIL_WITHOUT_EXTRA_ARGS, FAIL_WITH_PADDING_ONLY}, - new Object[] {oracleDatabaseAdapter, new OracleImportJobTestConfiguration(), FAIL_WITHOUT_EXTRA_ARGS, SUCCEED_WITH_PADDING_ONLY}, - new Object[] { new MySqlDatabaseAdapter(), new MySQLImportJobTestConfiguration(), SUCCEED_WITHOUT_EXTRA_ARGS, SUCCEED_WITH_PADDING_ONLY}, - new Object[] { new MSSQLServerDatabaseAdapter(), new MSSQLServerImportJobTestConfiguration(), SUCCEED_WITHOUT_EXTRA_ARGS, SUCCEED_WITH_PADDING_ONLY}, - new Object[] { postgresAdapter, new PostgresqlImportJobTestConfigurationForNumeric(), FAIL_WITHOUT_EXTRA_ARGS, FAIL_WITH_PADDING_ONLY}, - new Object[] { postgresAdapter, new PostgresqlImportJobTestConfigurationPaddingShouldSucceed(), SUCCEED_WITHOUT_EXTRA_ARGS, SUCCEED_WITH_PADDING_ONLY} - ); - } - - public AvroImportForNumericTypesTest(DatabaseAdapter adapter, ImportJobTestConfiguration configuration, boolean failWithoutExtraArgs, boolean failWithPaddingOnly) { - this.adapter = adapter; - this.configuration = configuration; - this.failWithoutExtraArgs = failWithoutExtraArgs; - this.failWithPadding = failWithPaddingOnly; - } - - @Rule - public ExpectedException thrown = ExpectedException.none(); - - @Override - protected Configuration getConf() { - return conf; - } - - @Override - protected boolean useHsqldbTestServer() { - return false; - } - - @Override - protected String getConnectString() { - return adapter.getConnectionString(); - } - - @Override - protected SqoopOptions getSqoopOptions(Configuration conf) { - SqoopOptions opts = new SqoopOptions(conf); - adapter.injectConnectionParameters(opts); - return opts; - } - - @Override - protected void dropTableIfExists(String table) throws SQLException { - adapter.dropTableIfExists(table, getManager()); - } - - @Before - public void setUp() { - super.setUp(); - String[] names = configuration.getNames(); - String[] types = configuration.getTypes(); - createTableWithColTypesAndNames(names, types, new String[0]); - List<String[]> inputData = configuration.getSampleData(); - for (String[] input : inputData) { - insertIntoTable(names, types, input); - } - } - - @After - public void tearDown() { - try { - dropTableIfExists(getTableName()); - } catch (SQLException e) { - LOG.warn("Error trying to drop table on tearDown: " + e); - } - super.tearDown(); - } - - private ArgumentArrayBuilder getArgsBuilder() { - ArgumentArrayBuilder builder = AvroTestUtils.getBuilderForAvroPaddingTest(this); - builder.withOption("connect", getConnectString()); - return builder; - } - - @Test - public void testAvroImportWithoutPadding() throws IOException { - if (failWithoutExtraArgs) { - thrown.expect(IOException.class); - thrown.expectMessage("Failure during job; return status 1"); - } - String[] args = getArgsBuilder().build(); - runImport(args); - if (!failWithoutExtraArgs) { - verify(); - } - } - - @Test - public void testAvroImportWithPadding() throws IOException { - if (failWithPadding) { - thrown.expect(IOException.class); - thrown.expectMessage("Failure during job; return status 1"); - } - ArgumentArrayBuilder builder = getArgsBuilder(); - builder.withProperty("sqoop.avro.decimal_padding.enable", "true"); - runImport(builder.build()); - if (!failWithPadding) { - verify(); - } - } - - @Test - public void testAvroImportWithDefaultPrecisionAndScale() throws IOException { - ArgumentArrayBuilder builder = getArgsBuilder(); - builder.withProperty("sqoop.avro.decimal_padding.enable", "true"); - builder.withProperty("sqoop.avro.logical_types.decimal.default.precision", "38"); - builder.withProperty("sqoop.avro.logical_types.decimal.default.scale", "3"); - runImport(builder.build()); - verify(); - } - - private void verify() { - AvroTestUtils.registerDecimalConversionUsageForVerification(); - AvroTestUtils.verify(configuration.getExpectedResults(), getConf(), getTablePath()); - } -} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/avro/configuration/MSSQLServerImportJobTestConfiguration.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/avro/configuration/MSSQLServerImportJobTestConfiguration.java b/src/test/org/apache/sqoop/importjob/avro/configuration/MSSQLServerImportJobTestConfiguration.java deleted file mode 100644 index 182d296..0000000 --- a/src/test/org/apache/sqoop/importjob/avro/configuration/MSSQLServerImportJobTestConfiguration.java +++ /dev/null @@ -1,62 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.sqoop.importjob.avro.configuration; - -import org.apache.sqoop.importjob.ImportJobTestConfiguration; - -import java.util.ArrayList; -import java.util.List; - -public class MSSQLServerImportJobTestConfiguration implements ImportJobTestConfiguration { - - @Override - public String[] getTypes() { - String[] columnTypes = {"INT", "NUMERIC", "NUMERIC(20)", "NUMERIC(20,5)", "NUMERIC(20,0)", "NUMERIC(38,5)", - "DECIMAL", "DECIMAL(20)", "DECIMAL(20,5)", "DECIMAL(20,0)", "DECIMAL(38,5)"}; - return columnTypes; - } - - @Override - public String[] getNames() { - String[] columnNames = {"ID", "N1", "N2", "N3", "N4", "N5", "D1", "D2", "D3", "D4", "D5"}; - return columnNames; - } - - @Override - public List<String[]> getSampleData() { - List<String[]> inputData = new ArrayList<>(); - inputData.add(new String[]{"1", "100.050", "1000000.05", "1000000.05", "1000000.05", "1000000.05", - "100.060", "1000000.05", "1000000.05", "1000000.05", "1000000.05"}); - return inputData; - } - - @Override - public String[] getExpectedResults() { - String expectedRecord = "{\"ID\": 1, \"N1\": 100, \"N2\": 1000000, \"N3\": 1000000.05000, \"N4\": 1000000, \"N5\": 1000000.05000, " + - "\"D1\": 100, \"D2\": 1000000, \"D3\": 1000000.05000, \"D4\": 1000000, \"D5\": 1000000.05000}"; - String[] expectedResult = new String[1]; - expectedResult[0] = expectedRecord; - return expectedResult; - } - - @Override - public String toString() { - return getClass().getSimpleName(); - } -} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/avro/configuration/MySQLImportJobTestConfiguration.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/avro/configuration/MySQLImportJobTestConfiguration.java b/src/test/org/apache/sqoop/importjob/avro/configuration/MySQLImportJobTestConfiguration.java deleted file mode 100644 index e9bf991..0000000 --- a/src/test/org/apache/sqoop/importjob/avro/configuration/MySQLImportJobTestConfiguration.java +++ /dev/null @@ -1,63 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.sqoop.importjob.avro.configuration; - -import org.apache.sqoop.importjob.ImportJobTestConfiguration; - -import java.util.ArrayList; -import java.util.List; - -public class MySQLImportJobTestConfiguration implements ImportJobTestConfiguration { - - @Override - public String[] getTypes() { - String[] columnTypes = {"INT", "NUMERIC", "NUMERIC(20)", "NUMERIC(20,5)", "NUMERIC(20,0)", "NUMERIC(65,5)", - "DECIMAL", "DECIMAL(20)", "DECIMAL(20,5)", "DECIMAL(20,0)", "DECIMAL(65,5)"}; - return columnTypes; - } - - @Override - public String[] getNames() { - String[] columnNames = {"ID", "N1", "N2", "N3", "N4", "N5", "D1", "D2", "D3", "D4", "D5"}; - return columnNames; - } - - @Override - public List<String[]> getSampleData() { - List<String[]> inputData = new ArrayList<>(); - inputData.add(new String[]{"1", "100.030", "1000000.05", "1000000.05", "1000000.05", "1000000.05", - "100.040", "1000000.05", "1000000.05", "1000000.05", "1000000.05"}); - return inputData; - } - - - @Override - public String[] getExpectedResults() { - String expectedRecord = "{\"ID\": 1, \"N1\": 100, \"N2\": 1000000, \"N3\": 1000000.05000, \"N4\": 1000000, \"N5\": 1000000.05000, " + - "\"D1\": 100, \"D2\": 1000000, \"D3\": 1000000.05000, \"D4\": 1000000, \"D5\": 1000000.05000}"; - String[] expectedResult = new String[1]; - expectedResult[0] = expectedRecord; - return expectedResult; - } - - @Override - public String toString() { - return getClass().getSimpleName(); - } -} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/avro/configuration/OracleImportJobTestConfiguration.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/avro/configuration/OracleImportJobTestConfiguration.java b/src/test/org/apache/sqoop/importjob/avro/configuration/OracleImportJobTestConfiguration.java deleted file mode 100644 index b7bad08..0000000 --- a/src/test/org/apache/sqoop/importjob/avro/configuration/OracleImportJobTestConfiguration.java +++ /dev/null @@ -1,64 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.sqoop.importjob.avro.configuration; - -import org.apache.sqoop.importjob.ImportJobTestConfiguration; - -import java.util.ArrayList; -import java.util.List; - -/** - * This test configuration intends to cover the fact that oracle stores these types without padding them with 0s, - * therefore when importing into avro, one has to use the padding feature. - */ -public class OracleImportJobTestConfiguration implements ImportJobTestConfiguration { - - @Override - public String[] getTypes() { - return new String[]{"INT", "NUMBER(20)", "NUMBER(20,5)", "NUMBER(20,-5)", "NUMBER(*,5)", - "DECIMAL", "DECIMAL(20)", "DECIMAL(20,5)", "DECIMAL(20,-5)", "DECIMAL(*,5)"}; - } - - @Override - public String[] getNames() { - return new String[]{"ID", "N2", "N3", "N4", "N5", "D1", "D2", "D3", "D4", "D5"}; - } - - @Override - public List<String[]> getSampleData() { - List<String[]> data = new ArrayList<>(); - data.add(new String[]{"1", "1000000.05", "1000000.05", "1000000.05", "1000000.05", - "100.02", "1000000.05", "1000000.05", "1000000.05", "1000000.05"}); - return data; - } - - @Override - public String[] getExpectedResults() { - String expectedRecord = "{\"ID\": 1, \"N2\": 1000000, \"N3\": 1000000.05000, \"N4\": 1000000, \"N5\": 1000000.05000, " + - "\"D1\": 100, \"D2\": 1000000, \"D3\": 1000000.05000, \"D4\": 1000000, \"D5\": 1000000.05000}"; - String[] expectedResult = new String[1]; - expectedResult[0] = expectedRecord; - return expectedResult; - } - - @Override - public String toString() { - return getClass().getSimpleName(); - } -} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/avro/configuration/OracleImportJobTestConfigurationForNumber.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/avro/configuration/OracleImportJobTestConfigurationForNumber.java b/src/test/org/apache/sqoop/importjob/avro/configuration/OracleImportJobTestConfigurationForNumber.java deleted file mode 100644 index 465e61f..0000000 --- a/src/test/org/apache/sqoop/importjob/avro/configuration/OracleImportJobTestConfigurationForNumber.java +++ /dev/null @@ -1,65 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.sqoop.importjob.avro.configuration; - -import org.apache.sqoop.importjob.ImportJobTestConfiguration; - -import java.util.ArrayList; -import java.util.List; - -/** - * This test configuration covers NUMBER without a defined precision and scale. - * This is the type that is probably the most commonly used to store numbers and also the most problematic, - * as Sqoop sees this type with a 0 precision and -127 scale, both invalid values. - * Therefore, NUMBER requires special treatment. - * The user has to specify precision and scale when importing into avro. - */ -public class OracleImportJobTestConfigurationForNumber implements ImportJobTestConfiguration { - - - @Override - public String[] getTypes() { - return new String[]{"INT", "NUMBER", "NUMBER(20)", "NUMBER(20,5)"}; - } - - @Override - public String[] getNames() { - return new String[]{"ID", "N1", "N2", "N3"}; - } - - @Override - public List<String[]> getSampleData() { - List<String[]> data = new ArrayList<>(); - data.add(new String[]{"1", "100.01", "100.01", "100.03"}); - return data; - } - - @Override - public String[] getExpectedResults() { - String expectedRecord = "{\"ID\": 1, \"N1\": 100.010, \"N2\": 100, \"N3\": 100.03000}"; - String[] expectedResult = new String[1]; - expectedResult[0] = expectedRecord; - return expectedResult; - } - - @Override - public String toString() { - return getClass().getSimpleName(); - } -} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/avro/configuration/PostgresqlImportJobTestConfigurationForNumeric.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/avro/configuration/PostgresqlImportJobTestConfigurationForNumeric.java b/src/test/org/apache/sqoop/importjob/avro/configuration/PostgresqlImportJobTestConfigurationForNumeric.java deleted file mode 100644 index 66715c1..0000000 --- a/src/test/org/apache/sqoop/importjob/avro/configuration/PostgresqlImportJobTestConfigurationForNumeric.java +++ /dev/null @@ -1,64 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.sqoop.importjob.avro.configuration; - -import org.apache.sqoop.importjob.ImportJobTestConfiguration; - -import java.util.ArrayList; -import java.util.List; - -/** - * This test configuration covers the case when postgres returns invalid values - * for precision and scale for NUMERIC. Also, important, that the accompanying columns - * - NUMERIC(20) and NUMERIC(20, 5) don't get modified. - */ -public class PostgresqlImportJobTestConfigurationForNumeric implements ImportJobTestConfiguration { - - @Override - public String[] getTypes() { - String[] columnTypes = {"INT", "NUMERIC", "NUMERIC(20)", "NUMERIC(20, 5)"}; - return columnTypes; - } - - @Override - public String[] getNames() { - String[] columnNames = {"ID", "N1", "N2", "N3"}; - return columnNames; - } - - @Override - public List<String[]> getSampleData() { - List<String[]> inputData = new ArrayList<>(); - inputData.add(new String[]{"1", "100.01", "100.01", "100.01"}); - return inputData; - } - - @Override - public String[] getExpectedResults() { - String expectedRecord = "{\"ID\": 1, \"N1\": 100.010, \"N2\": 100, \"N3\": 100.01000}"; - String[] expectedResult = new String[1]; - expectedResult[0] = expectedRecord; - return expectedResult; - } - - @Override - public String toString() { - return getClass().getSimpleName(); - } -} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/avro/configuration/PostgresqlImportJobTestConfigurationPaddingShouldSucceed.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/avro/configuration/PostgresqlImportJobTestConfigurationPaddingShouldSucceed.java b/src/test/org/apache/sqoop/importjob/avro/configuration/PostgresqlImportJobTestConfigurationPaddingShouldSucceed.java deleted file mode 100644 index ec4db41..0000000 --- a/src/test/org/apache/sqoop/importjob/avro/configuration/PostgresqlImportJobTestConfigurationPaddingShouldSucceed.java +++ /dev/null @@ -1,62 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.sqoop.importjob.avro.configuration; - -import org.apache.sqoop.importjob.ImportJobTestConfiguration; - -import java.util.ArrayList; -import java.util.List; - -public class PostgresqlImportJobTestConfigurationPaddingShouldSucceed implements ImportJobTestConfiguration { - - @Override - public String[] getTypes() { - String[] columnTypes = {"INT", "NUMERIC(20)", "NUMERIC(20,5)", "NUMERIC(20,0)", "NUMERIC(1000,5)", - "DECIMAL(20)", "DECIMAL(20)", "DECIMAL(20,5)", "DECIMAL(20,0)", "DECIMAL(1000,5)"}; - return columnTypes; - } - - @Override - public String[] getNames() { - String[] columnNames = {"ID", "N2", "N3", "N4", "N5", "D1", "D2", "D3", "D4", "D5"}; - return columnNames; - } - - @Override - public List<String[]> getSampleData() { - List<String[]> inputData = new ArrayList<>(); - inputData.add(new String[]{"1", "1000000.05", "1000000.05", "1000000.05", "1000000.05", - "100.02", "1000000.05", "1000000.05", "1000000.05", "1000000.05"}); - return inputData; - } - - @Override - public String[] getExpectedResults() { - String expectedRecord = "{\"ID\": 1, \"N2\": 1000000, \"N3\": 1000000.05000, \"N4\": 1000000, \"N5\": 1000000.05000, " + - "\"D1\": 100, \"D2\": 1000000, \"D3\": 1000000.05000, \"D4\": 1000000, \"D5\": 1000000.05000}"; - String[] expectedResult = new String[1]; - expectedResult[0] = expectedRecord; - return expectedResult; - } - - @Override - public String toString() { - return getClass().getSimpleName(); - } -} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/configuration/AvroTestConfiguration.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/configuration/AvroTestConfiguration.java b/src/test/org/apache/sqoop/importjob/configuration/AvroTestConfiguration.java new file mode 100644 index 0000000..1008899 --- /dev/null +++ b/src/test/org/apache/sqoop/importjob/configuration/AvroTestConfiguration.java @@ -0,0 +1,24 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.importjob.configuration; + +public interface AvroTestConfiguration extends ImportJobTestConfiguration { + + String[] getExpectedResultsForAvro(); +} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/configuration/GenericImportJobSplitByTestConfiguration.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/configuration/GenericImportJobSplitByTestConfiguration.java b/src/test/org/apache/sqoop/importjob/configuration/GenericImportJobSplitByTestConfiguration.java index f137b56..e99b526 100644 --- a/src/test/org/apache/sqoop/importjob/configuration/GenericImportJobSplitByTestConfiguration.java +++ b/src/test/org/apache/sqoop/importjob/configuration/GenericImportJobSplitByTestConfiguration.java @@ -19,7 +19,6 @@ package org.apache.sqoop.importjob.configuration; import org.apache.commons.lang3.StringUtils; -import org.apache.sqoop.importjob.ImportJobTestConfiguration; import java.util.ArrayList; import java.util.List; @@ -28,7 +27,7 @@ import java.util.List; * This test configuration intends to cover the fact that oracle stores these types without padding them with 0s, * therefore when importing into avro, one has to use the padding feature. */ -public class GenericImportJobSplitByTestConfiguration implements ImportJobTestConfiguration { +public class GenericImportJobSplitByTestConfiguration implements ImportJobTestConfiguration, ParquetTestConfiguration { public static final String NAME_COLUMN = "NAME"; public static final char SEPARATOR = ','; @@ -65,7 +64,7 @@ public class GenericImportJobSplitByTestConfiguration implements ImportJobTestCo } @Override - public String[] getExpectedResults() { + public String[] getExpectedResultsForParquet() { return data.stream() .map(element -> StringUtils.join(element, SEPARATOR)) .toArray(String[]::new); http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/configuration/ImportJobTestConfiguration.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/configuration/ImportJobTestConfiguration.java b/src/test/org/apache/sqoop/importjob/configuration/ImportJobTestConfiguration.java new file mode 100644 index 0000000..a57f2f5 --- /dev/null +++ b/src/test/org/apache/sqoop/importjob/configuration/ImportJobTestConfiguration.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.importjob.configuration; + +import java.util.List; + +public interface ImportJobTestConfiguration { + + String[] getTypes(); + + String[] getNames(); + + List<String[]> getSampleData(); +} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/configuration/MSSQLServerImportJobTestConfiguration.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/configuration/MSSQLServerImportJobTestConfiguration.java b/src/test/org/apache/sqoop/importjob/configuration/MSSQLServerImportJobTestConfiguration.java new file mode 100644 index 0000000..4ad7def --- /dev/null +++ b/src/test/org/apache/sqoop/importjob/configuration/MSSQLServerImportJobTestConfiguration.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.importjob.configuration; + +import java.util.ArrayList; +import java.util.List; + +public class MSSQLServerImportJobTestConfiguration implements ImportJobTestConfiguration, AvroTestConfiguration, ParquetTestConfiguration { + + @Override + public String[] getTypes() { + String[] columnTypes = {"INT", "NUMERIC", "NUMERIC(20)", "NUMERIC(20,5)", "NUMERIC(20,0)", "NUMERIC(38,5)", + "DECIMAL", "DECIMAL(20)", "DECIMAL(20,5)", "DECIMAL(20,0)", "DECIMAL(38,5)"}; + return columnTypes; + } + + @Override + public String[] getNames() { + String[] columnNames = {"ID", "N1", "N2", "N3", "N4", "N5", "D1", "D2", "D3", "D4", "D5"}; + return columnNames; + } + + @Override + public List<String[]> getSampleData() { + List<String[]> inputData = new ArrayList<>(); + inputData.add(new String[]{"1", "100.050", "1000000.05", "1000000.05", "1000000.05", "1000000.05", + "100.060", "1000000.05", "1000000.05", "1000000.05", "1000000.05"}); + return inputData; + } + + @Override + public String[] getExpectedResultsForAvro() { + String expectedRecord = "{\"ID\": 1, \"N1\": 100, \"N2\": 1000000, \"N3\": 1000000.05000, \"N4\": 1000000, \"N5\": 1000000.05000, " + + "\"D1\": 100, \"D2\": 1000000, \"D3\": 1000000.05000, \"D4\": 1000000, \"D5\": 1000000.05000}"; + String[] expectedResult = new String[1]; + expectedResult[0] = expectedRecord; + return expectedResult; + } + + @Override + public String[] getExpectedResultsForParquet() { + String expectedRecord = "1,100,1000000,1000000.05000,1000000,1000000.05000,100,1000000,1000000.05000,1000000,1000000.05000"; + String[] expectedResult = new String[1]; + expectedResult[0] = expectedRecord; + return expectedResult; + } + + @Override + public String toString() { + return getClass().getSimpleName(); + } +} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/configuration/MySQLImportJobTestConfiguration.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/configuration/MySQLImportJobTestConfiguration.java b/src/test/org/apache/sqoop/importjob/configuration/MySQLImportJobTestConfiguration.java new file mode 100644 index 0000000..fbcbdeb --- /dev/null +++ b/src/test/org/apache/sqoop/importjob/configuration/MySQLImportJobTestConfiguration.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.importjob.configuration; + +import java.util.ArrayList; +import java.util.List; + +public class MySQLImportJobTestConfiguration implements ImportJobTestConfiguration, AvroTestConfiguration, ParquetTestConfiguration { + + @Override + public String[] getTypes() { + String[] columnTypes = {"INT", "NUMERIC", "NUMERIC(20)", "NUMERIC(20,5)", "NUMERIC(20,0)", "NUMERIC(65,5)", + "DECIMAL", "DECIMAL(20)", "DECIMAL(20,5)", "DECIMAL(20,0)", "DECIMAL(65,5)"}; + return columnTypes; + } + + @Override + public String[] getNames() { + String[] columnNames = {"ID", "N1", "N2", "N3", "N4", "N5", "D1", "D2", "D3", "D4", "D5"}; + return columnNames; + } + + @Override + public List<String[]> getSampleData() { + List<String[]> inputData = new ArrayList<>(); + inputData.add(new String[]{"1", "100.030", "1000000.05", "1000000.05", "1000000.05", "1000000.05", + "100.040", "1000000.05", "1000000.05", "1000000.05", "1000000.05"}); + return inputData; + } + + @Override + public String[] getExpectedResultsForAvro() { + String expectedRecord = "{\"ID\": 1, \"N1\": 100, \"N2\": 1000000, \"N3\": 1000000.05000, \"N4\": 1000000, \"N5\": 1000000.05000, " + + "\"D1\": 100, \"D2\": 1000000, \"D3\": 1000000.05000, \"D4\": 1000000, \"D5\": 1000000.05000}"; + String[] expectedResult = new String[1]; + expectedResult[0] = expectedRecord; + return expectedResult; + } + + @Override + public String[] getExpectedResultsForParquet() { + String expectedRecord = "1,100,1000000,1000000.05000,1000000,1000000.05000,100,1000000,1000000.05000,1000000,1000000.05000"; + String[] expectedResult = new String[1]; + expectedResult[0] = expectedRecord; + return expectedResult; + } + + @Override + public String toString() { + return getClass().getSimpleName(); + } +} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/configuration/OracleImportJobTestConfiguration.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/configuration/OracleImportJobTestConfiguration.java b/src/test/org/apache/sqoop/importjob/configuration/OracleImportJobTestConfiguration.java new file mode 100644 index 0000000..303a523 --- /dev/null +++ b/src/test/org/apache/sqoop/importjob/configuration/OracleImportJobTestConfiguration.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.importjob.configuration; + +import java.util.ArrayList; +import java.util.List; + +/** + * This test configuration intends to cover the fact that oracle stores these types without padding them with 0s, + * therefore when importing into avro, one has to use the padding feature. + */ +public class OracleImportJobTestConfiguration implements ImportJobTestConfiguration, AvroTestConfiguration, ParquetTestConfiguration { + + @Override + public String[] getTypes() { + return new String[]{"INT", "NUMBER(20)", "NUMBER(20,5)", "NUMBER(20,-5)", "NUMBER(*,5)", + "DECIMAL", "DECIMAL(20)", "DECIMAL(20,5)", "DECIMAL(20,-5)", "DECIMAL(*,5)"}; + } + + @Override + public String[] getNames() { + return new String[]{"ID", "N2", "N3", "N4", "N5", "D1", "D2", "D3", "D4", "D5"}; + } + + @Override + public List<String[]> getSampleData() { + List<String[]> data = new ArrayList<>(); + data.add(new String[]{"1", "1000000.05", "1000000.05", "1000000.05", "1000000.05", + "100.02", "1000000.05", "1000000.05", "1000000.05", "1000000.05"}); + return data; + } + + @Override + public String[] getExpectedResultsForAvro() { + String expectedRecord = "{\"ID\": 1, \"N2\": 1000000, \"N3\": 1000000.05000, \"N4\": 1000000, \"N5\": 1000000.05000, " + + "\"D1\": 100, \"D2\": 1000000, \"D3\": 1000000.05000, \"D4\": 1000000, \"D5\": 1000000.05000}"; + String[] expectedResult = new String[1]; + expectedResult[0] = expectedRecord; + return expectedResult; + } + + @Override + public String[] getExpectedResultsForParquet() { + String expectedRecord = "1,1000000,1000000.05000,1000000,1000000.05000,100,1000000,1000000.05000,1000000,1000000.05000"; + String[] expectedResult = new String[1]; + expectedResult[0] = expectedRecord; + return expectedResult; + } + + @Override + public String toString() { + return getClass().getSimpleName(); + } +} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/configuration/OracleImportJobTestConfigurationForNumber.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/configuration/OracleImportJobTestConfigurationForNumber.java b/src/test/org/apache/sqoop/importjob/configuration/OracleImportJobTestConfigurationForNumber.java new file mode 100644 index 0000000..96dd077 --- /dev/null +++ b/src/test/org/apache/sqoop/importjob/configuration/OracleImportJobTestConfigurationForNumber.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.importjob.configuration; + +import java.util.ArrayList; +import java.util.List; + +/** + * This test configuration covers NUMBER without a defined precision and scale. + * This is the type that is probably the most commonly used to store numbers and also the most problematic, + * as Sqoop sees this type with a 0 precision and -127 scale, both invalid values. + * Therefore, NUMBER requires special treatment. + * The user has to specify precision and scale when importing into avro. + */ +public class OracleImportJobTestConfigurationForNumber implements ImportJobTestConfiguration, AvroTestConfiguration, ParquetTestConfiguration { + + + @Override + public String[] getTypes() { + return new String[]{"INT", "NUMBER", "NUMBER(20)", "NUMBER(20,5)"}; + } + + @Override + public String[] getNames() { + return new String[]{"ID", "N1", "N2", "N3"}; + } + + @Override + public List<String[]> getSampleData() { + List<String[]> data = new ArrayList<>(); + data.add(new String[]{"1", "100.01", "100.01", "100.03"}); + return data; + } + + @Override + public String[] getExpectedResultsForAvro() { + String expectedRecord = "{\"ID\": 1, \"N1\": 100.010, \"N2\": 100, \"N3\": 100.03000}"; + String[] expectedResult = new String[1]; + expectedResult[0] = expectedRecord; + return expectedResult; + } + + @Override + public String[] getExpectedResultsForParquet() { + String expectedRecord = "1,100.010,100,100.03000"; + String[] expectedResult = new String[1]; + expectedResult[0] = expectedRecord; + return expectedResult; + } + + @Override + public String toString() { + return getClass().getSimpleName(); + } +} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/configuration/ParquetTestConfiguration.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/configuration/ParquetTestConfiguration.java b/src/test/org/apache/sqoop/importjob/configuration/ParquetTestConfiguration.java new file mode 100644 index 0000000..3c161d1 --- /dev/null +++ b/src/test/org/apache/sqoop/importjob/configuration/ParquetTestConfiguration.java @@ -0,0 +1,24 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.importjob.configuration; + +public interface ParquetTestConfiguration extends ImportJobTestConfiguration{ + + String[] getExpectedResultsForParquet(); +} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/configuration/PostgresqlImportJobTestConfigurationForNumeric.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/configuration/PostgresqlImportJobTestConfigurationForNumeric.java b/src/test/org/apache/sqoop/importjob/configuration/PostgresqlImportJobTestConfigurationForNumeric.java new file mode 100644 index 0000000..8ba0bdc --- /dev/null +++ b/src/test/org/apache/sqoop/importjob/configuration/PostgresqlImportJobTestConfigurationForNumeric.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.importjob.configuration; + +import java.util.ArrayList; +import java.util.List; + +/** + * This test configuration covers the case when postgres returns invalid values + * for precision and scale for NUMERIC. Also, important, that the accompanying columns + * - NUMERIC(20) and NUMERIC(20, 5) don't get modified. + */ +public class PostgresqlImportJobTestConfigurationForNumeric implements ImportJobTestConfiguration, AvroTestConfiguration, ParquetTestConfiguration { + + @Override + public String[] getTypes() { + String[] columnTypes = {"INT", "NUMERIC", "NUMERIC(20)", "NUMERIC(20, 5)"}; + return columnTypes; + } + + @Override + public String[] getNames() { + String[] columnNames = {"ID", "N1", "N2", "N3"}; + return columnNames; + } + + @Override + public List<String[]> getSampleData() { + List<String[]> inputData = new ArrayList<>(); + inputData.add(new String[]{"1", "100.01", "100.01", "100.01"}); + return inputData; + } + + @Override + public String[] getExpectedResultsForAvro() { + String expectedRecord = "{\"ID\": 1, \"N1\": 100.010, \"N2\": 100, \"N3\": 100.01000}"; + String[] expectedResult = new String[1]; + expectedResult[0] = expectedRecord; + return expectedResult; + } + + @Override + public String[] getExpectedResultsForParquet() { + String expectedRecord = "1,100.010,100,100.01000"; + String[] expectedResult = new String[1]; + expectedResult[0] = expectedRecord; + return expectedResult; + } + + @Override + public String toString() { + return getClass().getSimpleName(); + } +} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/importjob/configuration/PostgresqlImportJobTestConfigurationPaddingShouldSucceed.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/importjob/configuration/PostgresqlImportJobTestConfigurationPaddingShouldSucceed.java b/src/test/org/apache/sqoop/importjob/configuration/PostgresqlImportJobTestConfigurationPaddingShouldSucceed.java new file mode 100644 index 0000000..45eaf04 --- /dev/null +++ b/src/test/org/apache/sqoop/importjob/configuration/PostgresqlImportJobTestConfigurationPaddingShouldSucceed.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.importjob.configuration; + +import java.util.ArrayList; +import java.util.List; + +public class PostgresqlImportJobTestConfigurationPaddingShouldSucceed implements ImportJobTestConfiguration, AvroTestConfiguration, ParquetTestConfiguration { + + @Override + public String[] getTypes() { + String[] columnTypes = {"INT", "NUMERIC(20)", "NUMERIC(20,5)", "NUMERIC(20,0)", "NUMERIC(1000,5)", + "DECIMAL(20)", "DECIMAL(20)", "DECIMAL(20,5)", "DECIMAL(20,0)", "DECIMAL(1000,5)"}; + return columnTypes; + } + + @Override + public String[] getNames() { + String[] columnNames = {"ID", "N2", "N3", "N4", "N5", "D1", "D2", "D3", "D4", "D5"}; + return columnNames; + } + + @Override + public List<String[]> getSampleData() { + List<String[]> inputData = new ArrayList<>(); + inputData.add(new String[]{"1", "1000000.05", "1000000.05", "1000000.05", "1000000.05", + "100.02", "1000000.05", "1000000.05", "1000000.05", "1000000.05"}); + return inputData; + } + + @Override + public String[] getExpectedResultsForAvro() { + String expectedRecord = "{\"ID\": 1, \"N2\": 1000000, \"N3\": 1000000.05000, \"N4\": 1000000, \"N5\": 1000000.05000, " + + "\"D1\": 100, \"D2\": 1000000, \"D3\": 1000000.05000, \"D4\": 1000000, \"D5\": 1000000.05000}"; + String[] expectedResult = new String[1]; + expectedResult[0] = expectedRecord; + return expectedResult; + } + + @Override + public String[] getExpectedResultsForParquet() { + String expectedRecord = "1,1000000,1000000.05000,1000000,1000000.05000,100,1000000,1000000.05000,1000000,1000000.05000"; + String[] expectedResult = new String[1]; + expectedResult[0] = expectedRecord; + return expectedResult; + } + + @Override + public String toString() { + return getClass().getSimpleName(); + } +} http://git-wip-us.apache.org/repos/asf/sqoop/blob/00a02dec/src/test/org/apache/sqoop/util/ParquetReader.java ---------------------------------------------------------------------- diff --git a/src/test/org/apache/sqoop/util/ParquetReader.java b/src/test/org/apache/sqoop/util/ParquetReader.java index 908ce56..727be58 100644 --- a/src/test/org/apache/sqoop/util/ParquetReader.java +++ b/src/test/org/apache/sqoop/util/ParquetReader.java @@ -18,6 +18,8 @@ package org.apache.sqoop.util; +import org.apache.avro.Conversions; +import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -29,7 +31,9 @@ import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.hadoop.metadata.BlockMetaData; import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; import org.apache.parquet.hadoop.metadata.CompressionCodecName; +import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.apache.parquet.hadoop.util.HiddenFileFilter; +import org.apache.parquet.schema.MessageType; import java.io.IOException; import java.util.ArrayDeque; @@ -65,7 +69,7 @@ public class ParquetReader implements AutoCloseable { this(pathToRead, new Configuration()); } - public GenericRecord next() throws IOException { + private GenericRecord next() throws IOException { GenericRecord result = reader.read(); if (result != null) { return result; @@ -113,29 +117,38 @@ public class ParquetReader implements AutoCloseable { } public CompressionCodecName getCodec() { - List<Footer> footers = getFooters(); + ParquetMetadata parquetMetadata = getParquetMetadata(); - Iterator<Footer> footersIterator = footers.iterator(); - if (footersIterator.hasNext()) { - Footer footer = footersIterator.next(); + Iterator<BlockMetaData> blockMetaDataIterator = parquetMetadata.getBlocks().iterator(); + if (blockMetaDataIterator.hasNext()) { + BlockMetaData blockMetaData = blockMetaDataIterator.next(); - Iterator<BlockMetaData> blockMetaDataIterator = footer.getParquetMetadata().getBlocks().iterator(); - if (blockMetaDataIterator.hasNext()) { - BlockMetaData blockMetaData = blockMetaDataIterator.next(); + Iterator<ColumnChunkMetaData> columnChunkMetaDataIterator = blockMetaData.getColumns().iterator(); - Iterator<ColumnChunkMetaData> columnChunkMetaDataIterator = blockMetaData.getColumns().iterator(); + if (columnChunkMetaDataIterator.hasNext()) { + ColumnChunkMetaData columnChunkMetaData = columnChunkMetaDataIterator.next(); - if (columnChunkMetaDataIterator.hasNext()) { - ColumnChunkMetaData columnChunkMetaData = columnChunkMetaDataIterator.next(); - - return columnChunkMetaData.getCodec(); - } + return columnChunkMetaData.getCodec(); } } return null; } + public MessageType readParquetSchema() { + try { + ParquetMetadata parquetMetadata = getParquetMetadata(); + + return parquetMetadata.getFileMetaData().getSchema(); + } finally { + close(); + } + } + + private ParquetMetadata getParquetMetadata() { + return getFooters().stream().findFirst().get().getParquetMetadata(); + } + private List<Footer> getFooters() { final List<Footer> footers; try { @@ -163,7 +176,8 @@ public class ParquetReader implements AutoCloseable { if (reader != null) { reader.close(); } - this.reader = AvroParquetReader.<GenericRecord>builder(file).build(); + GenericData.get().addLogicalTypeConversion(new Conversions.DecimalConversion()); + this.reader = AvroParquetReader.<GenericRecord>builder(file).withDataModel(GenericData.get()).build(); } catch (IOException e) { throw new RuntimeException(e); }
