lw309637554 commented on a change in pull request #2283:
URL: https://github.com/apache/hudi/pull/2283#discussion_r589520917
##########
File path:
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
##########
@@ -378,11 +389,75 @@ private[hudi] object HoodieSparkSqlWriter {
hiveSyncConfig.autoCreateDatabase =
parameters.get(HIVE_AUTO_CREATE_DATABASE_OPT_KEY).exists(r => r.toBoolean)
hiveSyncConfig.decodePartition =
parameters.getOrElse(URL_ENCODE_PARTITIONING_OPT_KEY,
DEFAULT_URL_ENCODE_PARTITIONING_OPT_VAL).toBoolean
+ hiveSyncConfig.tableProperties =
parameters.getOrElse(HIVE_TABLE_PROPERTIES, null)
+ hiveSyncConfig.serdeProperties = createSqlTableSerdeProperties(parameters,
basePath.toString,
+ hiveSyncConfig.partitionFields.size())
hiveSyncConfig
}
- private def metaSync(parameters: Map[String, String],
- basePath: Path,
+ /**
+ * Add Spark Sql related table properties to the HIVE_TABLE_PROPERTIES.
+ * @param sqlConf
+ * @param schema
+ * @param parameters
+ * @return A new parameters added the HIVE_TABLE_PROPERTIES property.
+ */
+ private def addSqlTableProperties(sqlConf: SQLConf, schema: StructType,
+ parameters: Map[String, String]):
Map[String, String] = {
+ // Convert the schema and partition info used by spark sql to hive table
properties.
+ // The following code refers to the spark code in
+ //
https://github.com/apache/spark/blob/master/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+
+ val partitionSet = parameters(HIVE_PARTITION_FIELDS_OPT_KEY)
+ .split(",").map(_.trim).filter(!_.isEmpty).toSet
+ val threshold = sqlConf.getConf(SCHEMA_STRING_LENGTH_THRESHOLD)
+
+ val (partitionCols, dataCols) = schema.partition(c =>
partitionSet.contains(c.name))
+ val reOrderedType = StructType(dataCols ++ partitionCols)
+ val schemaParts = reOrderedType.json.grouped(threshold).toSeq
+
+ var properties = Map(
+ "spark.sql.sources.provider" -> "hudi",
+ "spark.sql.sources.schema.numParts" -> schemaParts.size.toString
Review comment:
if we can persist this properties to metatable , not the hive table
properties?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]