This is an automated email from the ASF dual-hosted git repository. danny0405 pushed a commit to branch release-0.10.0 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 008df11e196d0ee81aa22cdd64bd27ca11d121dd Author: Alexey Kudinkin <[email protected]> AuthorDate: Fri Dec 3 04:33:38 2021 -0800 [HUDI-2911] Removing default value for `PARTITIONPATH_FIELD_NAME` resulting in incorrect `KeyGenerator` configuration (#4195) (cherry picked from commit bed7f9897a9127130c3d241df7634d44aa12167b) --- .../org/apache/hudi/exception/ExceptionUtil.java | 42 ++++++++++++++++++++++ .../hudi/keygen/constant/KeyGeneratorOptions.java | 2 +- .../scala/org/apache/hudi/DataSourceOptions.scala | 2 +- .../scala/org/apache/hudi/HoodieWriterUtils.scala | 1 - .../org/apache/hudi/TestHoodieSparkSqlWriter.scala | 29 +++++++++++++-- 5 files changed, 71 insertions(+), 5 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/exception/ExceptionUtil.java b/hudi-common/src/main/java/org/apache/hudi/exception/ExceptionUtil.java new file mode 100644 index 0000000..a0550ba --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/exception/ExceptionUtil.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.exception; + +import javax.annotation.Nonnull; + +/** + * Class collecting common utilities helping in handling {@link Exception}s + */ +public final class ExceptionUtil { + private ExceptionUtil() {} + + /** + * Fetches inner-most cause of the provided {@link Throwable} + */ + @Nonnull + public static Throwable getRootCause(@Nonnull Throwable t) { + Throwable cause = t; + while (cause.getCause() != null) { + cause = cause.getCause(); + } + + return cause; + } + +} diff --git a/hudi-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorOptions.java b/hudi-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorOptions.java index 3e369a5..d8535a2 100644 --- a/hudi-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorOptions.java +++ b/hudi-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorOptions.java @@ -52,7 +52,7 @@ public class KeyGeneratorOptions extends HoodieConfig { public static final ConfigProperty<String> PARTITIONPATH_FIELD_NAME = ConfigProperty .key("hoodie.datasource.write.partitionpath.field") - .defaultValue("partitionpath") + .noDefaultValue() .withDocumentation("Partition path field. Value to be used at the partitionPath component of HoodieKey. " + "Actual value ontained by invoking .toString()"); diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala index c1101f3..51bcd88 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala @@ -676,7 +676,7 @@ object DataSourceWriteOptions { val PARTITIONPATH_FIELD_OPT_KEY = KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key() /** @deprecated Use {@link PARTITIONPATH_FIELD} and its methods instead */ @Deprecated - val DEFAULT_PARTITIONPATH_FIELD_OPT_VAL = PARTITIONPATH_FIELD.defaultValue() + val DEFAULT_PARTITIONPATH_FIELD_OPT_VAL = null /** @deprecated Use {@link TABLE_NAME} and its methods instead */ @Deprecated diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala index 52d4a52..282de54 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala @@ -56,7 +56,6 @@ object HoodieWriterUtils { hoodieConfig.setDefaultValue(PRECOMBINE_FIELD) hoodieConfig.setDefaultValue(PAYLOAD_CLASS_NAME) hoodieConfig.setDefaultValue(RECORDKEY_FIELD) - hoodieConfig.setDefaultValue(PARTITIONPATH_FIELD) hoodieConfig.setDefaultValue(KEYGENERATOR_CLASS_NAME) hoodieConfig.setDefaultValue(ENABLE) hoodieConfig.setDefaultValue(COMMIT_METADATA_KEYPREFIX) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala index 48f8408..fa248e4 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala @@ -25,8 +25,9 @@ import org.apache.hudi.common.config.HoodieConfig import org.apache.hudi.common.model._ import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient, TableSchemaResolver} import org.apache.hudi.common.testutils.HoodieTestDataGenerator +import org.apache.hudi.common.util.PartitionPathEncodeUtils import org.apache.hudi.config.{HoodieBootstrapConfig, HoodieWriteConfig} -import org.apache.hudi.exception.HoodieException +import org.apache.hudi.exception.{ExceptionUtil, HoodieException} import org.apache.hudi.execution.bulkinsert.BulkInsertSortMode import org.apache.hudi.functional.TestBootstrap import org.apache.hudi.hive.HiveSyncConfig @@ -40,13 +41,16 @@ import org.apache.spark.sql.hudi.command.SqlKeyGenerator import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} import org.apache.spark.sql.{DataFrame, Dataset, Row, SQLContext, SaveMode, SparkSession} import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertTrue, fail} +import org.junit.jupiter.api.function.Executable import org.junit.jupiter.api.{AfterEach, BeforeEach, Test} import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.{CsvSource, EnumSource, ValueSource} import org.mockito.ArgumentMatchers.any import org.mockito.Mockito.{spy, times, verify} +import org.scalatest.Assertions.assertThrows import org.scalatest.Matchers.{assertResult, be, convertToAnyShouldWrapper, intercept} +import java.io.IOException import java.time.Instant import java.util.{Collections, Date, UUID} import scala.collection.JavaConversions._ @@ -346,7 +350,6 @@ class TestHoodieSparkSqlWriter { @Test def testInsertDatasetWithoutPrecombineField(): Unit = { - //create a new table val fooTableModifier = commonTableModifier.updated(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) .updated(DataSourceWriteOptions.INSERT_DROP_DUPS.key, "false") @@ -376,6 +379,28 @@ class TestHoodieSparkSqlWriter { } /** + * Test case for insert dataset without partitioning field + */ + @Test + def testInsertDatasetWithoutPartitionField(): Unit = { + val tableOpts = + commonTableModifier + .updated(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + + // generate the inserts + val schema = DataSourceTestUtils.getStructTypeExampleSchema + val structType = AvroConversionUtils.convertAvroSchemaToStructType(schema) + val records = DataSourceTestUtils.generateRandomRows(1) + val recordsSeq = convertRowListToSeq(records) + val df = spark.createDataFrame(sc.parallelize(recordsSeq), structType) + + // try write to Hudi + assertThrows[IOException] { + HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, tableOpts - DataSourceWriteOptions.PARTITIONPATH_FIELD.key, df) + } + } + + /** * Test case for bulk insert dataset with datasource impl multiple rounds. */ @Test
