alexeykudinkin commented on code in PR #7528:
URL: https://github.com/apache/hudi/pull/7528#discussion_r1083321386
##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystExpressionUtils.scala:
##########
@@ -78,14 +80,52 @@ object HoodieCatalystExpressionUtils {
* NOTE: Projection of the row from [[StructType]] A to [[StructType]] B is
only possible, if
* B is a subset of A
*/
- def generateUnsafeProjection(from: StructType, to: StructType):
UnsafeProjection = {
- val attrs = from.toAttributes
- val attrsMap = attrs.map(attr => (attr.name, attr)).toMap
- val targetExprs = to.fields.map(f => attrsMap(f.name))
+ def generateUnsafeProjection(sourceStructType: StructType, targetStructType:
StructType): UnsafeProjection = {
+ val resolver = SQLConf.get.resolver
+ val attrs = sourceStructType.toAttributes
+ val targetExprs = targetStructType.fields.map { targetField =>
+ val attrRef = attrs.find(attr => resolver(attr.name, targetField.name))
+ .getOrElse(throw new AnalysisException(s"Wasn't able to match target
field `${targetField.name}` to any of the source attributes ($attrs)"))
+
+ genProjectingExpression(attrRef, targetField.dataType)
+ }
GenerateUnsafeProjection.generate(targetExprs, attrs)
}
+ private def genProjectingExpression(sourceExpr: Expression,
+ targetDataType: DataType): Expression = {
+ checkState(sourceExpr.resolved)
+
+ // TODO support array, map
+ (sourceExpr.dataType, targetDataType) match {
+ case (sdt, tdt) if sdt == tdt =>
+ sourceExpr
+
+ case (sourceType: StructType, targetType: StructType) =>
+ val fieldValueExprs = targetType.fields.map { tf =>
Review Comment:
Realized that this is actually not the right approach and the problem is
elsewhere -- problem was in the following:
- Therefore the problem here was that we're simply not reading projected
records from the Parquet -- and the reason for that was that in case when
non-whitelisted RecordPayload is used -- we will fallback to reading full
record, but we still were allowing `NestedSchemaPruning` to be applied
nevertheless
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]