rahil-c commented on code in PR #18328:
URL: https://github.com/apache/hudi/pull/18328#discussion_r2997372654


##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedFileFormat.scala:
##########
@@ -398,30 +423,78 @@ class HoodieFileGroupReaderBasedFileFormat(tablePath: 
String,
     }
   }
 
+  private def detectVectorColumns(schema: StructType): Map[Int, 
HoodieSchema.Vector] =
+    
SparkFileFormatInternalRowReaderContext.detectVectorColumnsFromMetadata(schema)
+
+  private def replaceVectorFieldsWithBinary(schema: StructType, vectorCols: 
Map[Int, HoodieSchema.Vector]): StructType =
+    
SparkFileFormatInternalRowReaderContext.replaceVectorColumnsWithBinary(schema, 
vectorCols)
+
+  /**
+   * Wraps an iterator to convert binary VECTOR columns back to typed arrays.
+   * The read schema has BinaryType for vector columns; the target schema has 
ArrayType.
+   */
+  private def wrapWithVectorConversion(iter: Iterator[InternalRow],
+                                        readSchema: StructType,
+                                        targetSchema: StructType,
+                                        vectorCols: Map[Int, 
HoodieSchema.Vector]): Iterator[InternalRow] = {
+    val vectorProjection = UnsafeProjection.create(targetSchema)
+    val javaVectorCols: java.util.Map[Integer, HoodieSchema.Vector] =
+      vectorCols.map { case (k, v) => (Integer.valueOf(k), v) }.asJava
+    val mapper = VectorConversionUtils.buildRowMapper(readSchema, 
javaVectorCols, vectorProjection.apply(_))
+    iter.map(mapper.apply(_))
+  }
+
   // executor
   private def readBaseFile(file: PartitionedFile, parquetFileReader: 
SparkColumnarFileReader, requestedSchema: StructType,
                            remainingPartitionSchema: StructType, 
fixedPartitionIndexes: Set[Int], requiredSchema: StructType,
                            partitionSchema: StructType, outputSchema: 
StructType, filters: Seq[Filter],
                            storageConf: StorageConfiguration[Configuration]): 
Iterator[InternalRow] = {

Review Comment:
   added



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to