huaxingao commented on a change in pull request #33639:
URL: https://github.com/apache/spark/pull/33639#discussion_r687325363
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
##########
@@ -127,4 +145,260 @@ object ParquetUtils {
file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE ||
file.getName == ParquetFileWriter.PARQUET_METADATA_FILE
}
+
+ /**
+ * When the partial Aggregates (Max/Min/Count) are pushed down to parquet,
we don't need to
+ * createRowBaseReader to read data from parquet and aggregate at spark
layer. Instead we want
+ * to get the partial Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct an InternalRow from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of InternalRow
+ */
+ private[sql] def createAggInternalRowFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): InternalRow = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val mutableRow = new SpecificInternalRow(aggSchema.fields.map(x =>
x.dataType))
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+
+ parquetTypes.zipWithIndex.foreach {
Review comment:
Seems to me that `ParquetRowConverter` converts a parquet row, but I am
building a row from scratch.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]