[GitHub] [hudi] nsivabalan commented on a diff in pull request #7951: [HUDI-5796] Adding auto inferring partition from incoming df

via GitHub Mon, 27 Feb 2023 14:27:12 -0800


nsivabalan commented on code in PR #7951:
URL: https://github.com/apache/hudi/pull/7951#discussion_r1119377486



##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala:
##########
@@ -295,30 +295,35 @@ object DataSourceWriteOptions {
   def translateSqlOptions(optParams: Map[String, String]): Map[String, String] 
= {
     var translatedOptParams = optParams
     // translate the api partitionBy of spark DataFrameWriter to 
PARTITIONPATH_FIELD
-    if (optParams.contains(SparkDataSourceUtils.PARTITIONING_COLUMNS_KEY)) {
+    // we should set hoodie's partition path only if its not set by the user.
+    if (optParams.contains(SparkDataSourceUtils.PARTITIONING_COLUMNS_KEY)
+      && 
!optParams.contains(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key())) {
       val partitionColumns = 
optParams.get(SparkDataSourceUtils.PARTITIONING_COLUMNS_KEY)
         .map(SparkDataSourceUtils.decodePartitioningColumns)
         .getOrElse(Nil)
       val keyGeneratorClass = 
optParams.getOrElse(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key(),
         DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.defaultValue)
 
-      val partitionPathField =
+      // if not nonpartitioned key gen
+      if (keyGeneratorClass.isEmpty || 
!keyGeneratorClass.equals(classOf[NonpartitionedKeyGenerator].getName)) {
         keyGeneratorClass match {
           // Only CustomKeyGenerator needs special treatment, because it needs 
to be specified in a way
           // such as "field1:PartitionKeyType1,field2:PartitionKeyType2".
           // partitionBy can specify the partition like this: 
partitionBy("p1", "p2:SIMPLE", "p3:TIMESTAMP")
           case c if c == classOf[CustomKeyGenerator].getName =>
-            partitionColumns.map(e => {
+            val partitionPathField = partitionColumns.map(e => {
               if (e.contains(":")) {
                 e
               } else {
                 s"$e:SIMPLE"
               }
             }).mkString(",")
+            translatedOptParams = optParams ++ Map(PARTITIONPATH_FIELD.key -> 
partitionPathField)
           case _ =>
-            partitionColumns.mkString(",")
+            val partitionPathField = partitionColumns.mkString(",")
+            translatedOptParams = optParams ++ Map(PARTITIONPATH_FIELD.key -> 
partitionPathField)
         }
-      translatedOptParams = optParams ++ Map(PARTITIONPATH_FIELD.key -> 
partitionPathField)

Review Comment:
   will fix



##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala:
##########
@@ -295,30 +295,35 @@ object DataSourceWriteOptions {
   def translateSqlOptions(optParams: Map[String, String]): Map[String, String] 
= {
     var translatedOptParams = optParams
     // translate the api partitionBy of spark DataFrameWriter to 
PARTITIONPATH_FIELD
-    if (optParams.contains(SparkDataSourceUtils.PARTITIONING_COLUMNS_KEY)) {
+    // we should set hoodie's partition path only if its not set by the user.
+    if (optParams.contains(SparkDataSourceUtils.PARTITIONING_COLUMNS_KEY)

Review Comment:
   synced up directly. this should be ok behavior. may be we can add to our faq 
on how we deduce the partitioning columns. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [hudi] nsivabalan commented on a diff in pull request #7951: [HUDI-5796] Adding auto inferring partition from incoming df

Reply via email to