codope commented on code in PR #11534:
URL: https://github.com/apache/hudi/pull/11534#discussion_r1763355565


##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieDataTypeUtils.scala:
##########
@@ -30,26 +38,71 @@ object HoodieDataTypeUtils {
   def parseStructTypeFromJson(jsonSchema: String): StructType =
     StructType.fromString(jsonSchema)
 
+  def canUseRowWriter(schema: Schema, conf: Configuration): Boolean = {
+    if (conf.getBoolean(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE, true)) {
+      // if we can write lists with the old list structure, we can use row 
writer regardless of decimal precision
+      true
+    } else if (!HoodieAvroUtils.hasSmallPrecisionDecimalField(schema)) {
+      true
+    } else {
+      // small precision decimals require the legacy write mode but lists and 
maps require the new write mode when
+      // WRITE_OLD_LIST_STRUCTURE is false so we can only use row writer if 
one is present and the other is not
+      if (HoodieAvroUtils.hasListOrMapField(schema)) {
+        log.warn("Cannot use row writer due to presence of list or map with a 
small precision decimal field")
+        false
+      } else {
+        true
+      }
+    }
+  }
+
   /**
-   * Checks whether provided {@link DataType} contains {@link DecimalType} 
whose scale is less than
-   * {@link Decimal# MAX_LONG_DIGITS ( )}
+   * Checks whether default value (false) of 
"hoodie.parquet.writelegacyformat.enabled" should be
+   * overridden in case:
+   *
+   * <ul>
+   * <li>Property has not been explicitly set by the writer</li>
+   * <li>Data schema contains {@code DecimalType} that would be affected by 
it</li>
+   * </ul>
+   *
+   * If both of the aforementioned conditions are true, will override the 
default value of the config
+   * (by essentially setting the value) to make sure that the produced Parquet 
data files could be
+   * read by {@code AvroParquetReader}
+   *
+   * @param properties properties specified by the writer
+   * @param schema     schema of the dataset being written
    */
-  def hasSmallPrecisionDecimalType(sparkType: DataType): Boolean = {
-    sparkType match {
-      case st: StructType =>
-        st.exists(f => hasSmallPrecisionDecimalType(f.dataType))
-
-      case map: MapType =>
-        hasSmallPrecisionDecimalType(map.keyType) ||
-          hasSmallPrecisionDecimalType(map.valueType)
-
-      case at: ArrayType =>
-        hasSmallPrecisionDecimalType(at.elementType)
+  def tryOverrideParquetWriteLegacyFormatProperty(properties: 
java.util.Map[String, String], schema: StructType): Unit = {
+    tryOverrideParquetWriteLegacyFormatProperty(Left(properties),
+      AvroConversionUtils.convertStructTypeToAvroSchema(schema, "struct", 
"hoodie.source"))
+  }
 
-      case dt: DecimalType =>
-        dt.precision < Decimal.MAX_LONG_DIGITS
+  def tryOverrideParquetWriteLegacyFormatProperty(properties: TypedProperties, 
schema: Schema): Unit = {
+    tryOverrideParquetWriteLegacyFormatProperty(Right(properties), schema)
+  }
 
-      case _ => false
+  private def tryOverrideParquetWriteLegacyFormatProperty(properties: 
Either[java.util.Map[String, String], TypedProperties], schema: Schema): Unit = 
{
+    val legacyFormatEnabledProp = properties match {
+      case Left(map) => 
map.get(HoodieStorageConfig.PARQUET_WRITE_LEGACY_FORMAT_ENABLED.key)
+      case Right(typedProps) => 
typedProps.get(HoodieStorageConfig.PARQUET_WRITE_LEGACY_FORMAT_ENABLED.key)
+    }
+    if (legacyFormatEnabledProp == null && 
HoodieAvroUtils.hasSmallPrecisionDecimalField(schema)) {
+      // ParquetWriteSupport writes DecimalType to parquet as INT32/INT64 when 
the scale of decimalType
+      // is less than {@code Decimal.MAX_LONG_DIGITS}, but {@code 
AvroParquetReader} which is used by
+      // {@code HoodieParquetReader} does not support DecimalType encoded as 
INT32/INT64 as.

Review Comment:
   Got it, https://issues.apache.org/jira/browse/HUDI-8205 to track



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to