This is an automated email from the ASF dual-hosted git repository. yihua pushed a commit to branch release-0.11.1-rc2-prep in repository https://gitbox.apache.org/repos/asf/hudi.git
commit cd1514484fff1ae6e66b4e56ae11013bdf4ac6e9 Author: Y Ethan Guo <[email protected]> AuthorDate: Sat Jun 11 13:19:24 2022 -0700 [HUDI-4223] Fix NullPointerException from getLogRecordScanner when reading metadata table (#5840) When explicitly specifying the metadata table path for reading in spark, the "hoodie.metadata.enable" is overwritten to true for proper read behavior. --- .../src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala | 6 +++--- .../hudi/functional/TestMetadataTableWithSparkDataSource.scala | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala index a7ca60865f..2fdb9b882e 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala @@ -25,7 +25,6 @@ import org.apache.hadoop.fs.Path import org.apache.hadoop.mapred.JobConf import org.apache.hudi.HoodieConversionUtils.{toJavaOption, toScalaOption} import org.apache.hudi.HoodieMergeOnReadRDD.{AvroDeserializerSupport, collectFieldOrdinals, getPartitionPath, projectAvro, projectAvroUnsafe, projectRowUnsafe, resolveAvroSchemaNullability} -import org.apache.hudi.MergeOnReadSnapshotRelation.getFilePath import org.apache.hudi.common.config.HoodieMetadataConfig import org.apache.hudi.common.engine.HoodieLocalEngineContext import org.apache.hudi.common.fs.FSUtils @@ -37,9 +36,9 @@ import org.apache.hudi.config.HoodiePayloadConfig import org.apache.hudi.exception.HoodieException import org.apache.hudi.hadoop.config.HoodieRealtimeConfig import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes +import org.apache.hudi.internal.schema.InternalSchema import org.apache.hudi.metadata.HoodieTableMetadata.getDataTableBasePathFromMetadataTable import org.apache.hudi.metadata.{HoodieBackedTableMetadata, HoodieTableMetadata} -import org.apache.hudi.internal.schema.InternalSchema import org.apache.spark.rdd.RDD import org.apache.spark.sql.avro.HoodieAvroDeserializer import org.apache.spark.sql.catalyst.InternalRow @@ -324,7 +323,8 @@ private object HoodieMergeOnReadRDD { val fs = FSUtils.getFs(tablePath, hadoopConf) if (HoodieTableMetadata.isMetadataTable(tablePath)) { - val metadataConfig = tableState.metadataConfig + val metadataConfig = HoodieMetadataConfig.newBuilder() + .fromProperties(tableState.metadataConfig.getProps).enable(true).build() val dataTableBasePath = getDataTableBasePathFromMetadataTable(tablePath) val metadataTable = new HoodieBackedTableMetadata( new HoodieLocalEngineContext(hadoopConf), metadataConfig, diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala index 11705f9eb1..02e0ee6dfd 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala @@ -78,7 +78,7 @@ class TestMetadataTableWithSparkDataSource extends SparkClientFunctionalTestHarn .save(basePath) // Files partition of MT - val filesPartitionDF = spark.read.options(metadataOpts).format(hudi).load(s"$basePath/.hoodie/metadata/files") + val filesPartitionDF = spark.read.format(hudi).load(s"$basePath/.hoodie/metadata/files") // Smoke test filesPartitionDF.show() @@ -96,7 +96,7 @@ class TestMetadataTableWithSparkDataSource extends SparkClientFunctionalTestHarn assertEquals(expectedKeys, keys) // Column Stats Index partition of MT - val colStatsDF = spark.read.options(metadataOpts).format(hudi).load(s"$basePath/.hoodie/metadata/column_stats") + val colStatsDF = spark.read.format(hudi).load(s"$basePath/.hoodie/metadata/column_stats") // Smoke test colStatsDF.show()
