stefankandic commented on code in PR #45721:
URL: https://github.com/apache/spark/pull/45721#discussion_r1539067436


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala:
##########
@@ -52,18 +54,36 @@ class ArrayBasedMapBuilder(keyType: DataType, valueType: 
DataType) extends Seria
 
   private val mapKeyDedupPolicy = 
SQLConf.get.getConf(SQLConf.MAP_KEY_DEDUP_POLICY)
 
+  private lazy val keyNeedNormalize = 
NormalizeFloatingNumbers.needNormalize(keyType)
+
+  def normalize(value: Any, dataType: DataType): Any = dataType match {
+    case FloatType => NormalizeFloatingNumbers.FLOAT_NORMALIZER(value)
+    case DoubleType => NormalizeFloatingNumbers.DOUBLE_NORMALIZER(value)
+    case ArrayType(dt, _) =>
+      new GenericArrayData(value.asInstanceOf[GenericArrayData].array.map { 
element =>
+        normalize(element, dt)
+      })
+    case StructType(sf) =>
+      new GenericInternalRow(
+        value.asInstanceOf[GenericInternalRow].values.zipWithIndex.map { 
element =>
+        normalize(element._1, sf(element._2).dataType)

Review Comment:
   you could also check if you need to do normalization here right?
   
   this way we would avoid normalization of all fields of a struct if only one 
actually needs it



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to