rahil-c commented on code in PR #18328:
URL: https://github.com/apache/hudi/pull/18328#discussion_r2994736960


##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkFileFormatInternalRowReaderContext.scala:
##########
@@ -290,4 +307,50 @@ object SparkFileFormatInternalRowReaderContext {
     field.name.equals(ROW_INDEX_TEMPORARY_COLUMN_NAME)
   }
 
+  /**
+   * Detects VECTOR columns from HoodieSchema.
+   * Delegates to [[VectorConversionUtils.detectVectorColumns]].
+   * @return Map of ordinal to Vector schema for VECTOR fields.
+   */
+  private[hudi] def detectVectorColumns(schema: HoodieSchema): Map[Int, 
HoodieSchema.Vector] = {
+    VectorConversionUtils.detectVectorColumns(schema).asScala.map { case (k, 
v) => (k.intValue(), v) }.toMap
+  }
+
+  /**
+   * Detects VECTOR columns from Spark StructType metadata.
+   * Delegates to [[VectorConversionUtils.detectVectorColumnsFromMetadata]].
+   * @return Map of ordinal to Vector schema for VECTOR fields.
+   */
+  def detectVectorColumnsFromMetadata(schema: StructType): Map[Int, 
HoodieSchema.Vector] = {
+    VectorConversionUtils.detectVectorColumnsFromMetadata(schema).asScala.map 
{ case (k, v) => (k.intValue(), v) }.toMap
+  }
+
+  /**
+   * Replaces ArrayType with BinaryType for VECTOR columns so the Parquet 
reader
+   * can read FIXED_LEN_BYTE_ARRAY data without type mismatch.
+   * Delegates to [[VectorConversionUtils.replaceVectorColumnsWithBinary]].
+   */
+  def replaceVectorColumnsWithBinary(structType: StructType, vectorColumns: 
Map[Int, HoodieSchema.Vector]): StructType = {
+    val javaMap = vectorColumns.map { case (k, v) => (Integer.valueOf(k), 
v.asInstanceOf[AnyRef]) }.asJava
+    VectorConversionUtils.replaceVectorColumnsWithBinary(structType, javaMap)
+  }
+
+  /**
+   * Wraps an iterator to convert binary VECTOR columns back to typed arrays.
+   * Unpacks bytes from FIXED_LEN_BYTE_ARRAY into GenericArrayData using the 
canonical vector byte order.
+   */
+  private[hudi] def wrapWithVectorConversion(
+      iterator: ClosableIterator[InternalRow],
+      vectorColumns: Map[Int, HoodieSchema.Vector],
+      readSchema: StructType): ClosableIterator[InternalRow] = {
+    val javaVectorCols: java.util.Map[Integer, HoodieSchema.Vector] =
+      vectorColumns.map { case (k, v) => (Integer.valueOf(k), v) }.asJava
+    val mapper = VectorConversionUtils.buildRowMapper(readSchema, 
javaVectorCols, row => row)

Review Comment:
   Will look into fixing this



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to