This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 8de012bc2ce [MINOR] Use HoodieStorage and StorageConf in
HoodieCatalogTable (#12181)
8de012bc2ce is described below
commit 8de012bc2ce187a3d64a9dd050b1a2f89d56955d
Author: Shawn Chang <[email protected]>
AuthorDate: Thu Oct 31 02:31:42 2024 -0700
[MINOR] Use HoodieStorage and StorageConf in HoodieCatalogTable (#12181)
Co-authored-by: Shawn Chang <[email protected]>
---
.../spark/sql/catalyst/catalog/HoodieCatalogTable.scala | 16 +++++++++-------
.../org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala | 15 +++++++--------
2 files changed, 16 insertions(+), 15 deletions(-)
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
index 71580250d7d..1a23356381f 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
@@ -26,13 +26,14 @@ import org.apache.hudi.common.model.HoodieTableType
import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient}
import org.apache.hudi.common.table.HoodieTableConfig.URL_ENCODE_PARTITIONING
import org.apache.hudi.common.table.timeline.TimelineUtils
-import org.apache.hudi.common.util.{ConfigUtils, StringUtils}
+import org.apache.hudi.common.util.StringUtils
import org.apache.hudi.common.util.ValidationUtils.checkArgument
import org.apache.hudi.hadoop.fs.HadoopFSUtils
import org.apache.hudi.keygen.constant.{KeyGeneratorOptions, KeyGeneratorType}
import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory
-import org.apache.hudi.util.JFunction
+import org.apache.hudi.storage.HoodieStorageUtils
import org.apache.hudi.util.SparkConfigUtils
+
import org.apache.spark.internal.Logging
import org.apache.spark.sql.{AnalysisException, SparkSession}
import org.apache.spark.sql.avro.SchemaConverters
@@ -58,7 +59,7 @@ class HoodieCatalogTable(val spark: SparkSession, var table:
CatalogTable) exten
checkArgument(table.provider.map(_.toLowerCase(Locale.ROOT)).orNull ==
"hudi", s" ${table.qualifiedName} is not a Hudi table")
- private val hadoopConf = spark.sessionState.newHadoopConf
+ private val storageConf =
HadoopFSUtils.getStorageConfWithCopy(spark.sessionState.newHadoopConf)
/**
* database.table in catalog
@@ -79,14 +80,15 @@ class HoodieCatalogTable(val spark: SparkSession, var
table: CatalogTable) exten
/**
* A flag to whether the hoodie table exists.
*/
- val hoodieTableExists: Boolean = tableExistsInPath(tableLocation, hadoopConf)
+ val hoodieTableExists: Boolean =
+ tableExistsInPath(tableLocation,
HoodieStorageUtils.getStorage(tableLocation, storageConf))
/**
* Meta Client.
*/
lazy val metaClient: HoodieTableMetaClient = HoodieTableMetaClient.builder()
.setBasePath(tableLocation)
- .setConf(HadoopFSUtils.getStorageConfWithCopy(hadoopConf))
+ .setConf(storageConf)
.build()
/**
@@ -209,7 +211,7 @@ class HoodieCatalogTable(val spark: SparkSession, var
table: CatalogTable) exten
.fromProperties(properties)
.setDatabaseName(catalogDatabaseName)
.setTableCreateSchema(SchemaConverters.toAvroType(dataSchema,
recordName = recordName).toString())
- .initTable(HadoopFSUtils.getStorageConfWithCopy(hadoopConf),
tableLocation)
+ .initTable(storageConf, tableLocation)
} else {
val (recordName, namespace) =
AvroConversionUtils.getAvroRecordNameAndNamespace(table.identifier.table)
val schema = SchemaConverters.toAvroType(dataSchema, nullable = false,
recordName, namespace)
@@ -227,7 +229,7 @@ class HoodieCatalogTable(val spark: SparkSession, var
table: CatalogTable) exten
.setTableName(table.identifier.table)
.setTableCreateSchema(schema.toString())
.setPartitionFields(partitionColumns)
- .initTable(HadoopFSUtils.getStorageConfWithCopy(hadoopConf),
tableLocation)
+ .initTable(storageConf, tableLocation)
}
}
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala
index 153a4cd65aa..59daefa5426 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala
@@ -30,7 +30,8 @@ import
org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieInstan
import
org.apache.hudi.common.table.timeline.HoodieActiveTimeline.parseDateFromInstantTime
import org.apache.hudi.common.util.PartitionPathEncodeUtils
import org.apache.hudi.exception.HoodieException
-import org.apache.hudi.storage.{HoodieStorage, StoragePathInfo}
+import org.apache.hudi.storage.{HoodieStorage, StoragePath, StoragePathInfo}
+
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.sql.{AnalysisException, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
@@ -43,7 +44,6 @@ import org.apache.spark.sql.types._
import java.net.URI
import java.text.SimpleDateFormat
import java.util.Locale
-
import scala.collection.JavaConverters._
import scala.util.Try
@@ -176,7 +176,7 @@ object HoodieSqlCommonUtils extends SparkAdapterSupport {
val uri = if (isManaged) {
Some(sparkSession.sessionState.catalog.defaultTablePath(identifier))
} else {
- Some(new Path(location.get).toUri)
+ Some(new StoragePath(location.get).toUri)
}
getTableLocation(uri, identifier, sparkSession)
}
@@ -214,11 +214,10 @@ object HoodieSqlCommonUtils extends SparkAdapterSupport {
/**
* Check if the hoodie.properties exists in the table path.
*/
- def tableExistsInPath(tablePath: String, conf: Configuration): Boolean = {
- val basePath = new Path(tablePath)
- val fs = basePath.getFileSystem(conf)
- val metaPath = new Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME)
- fs.exists(metaPath)
+ def tableExistsInPath(tablePath: String, storage: HoodieStorage): Boolean = {
+ val basePath = new StoragePath(tablePath)
+ val metaPath = new StoragePath(basePath,
HoodieTableMetaClient.METAFOLDER_NAME)
+ storage.exists(metaPath)
}
/**