sunchao commented on a change in pull request #33639:
URL: https://github.com/apache/spark/pull/33639#discussion_r686372863
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
##########
@@ -127,4 +145,260 @@ object ParquetUtils {
file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE ||
file.getName == ParquetFileWriter.PARQUET_METADATA_FILE
}
+
+ /**
+ * When the partial Aggregates (Max/Min/Count) are pushed down to parquet,
we don't need to
+ * createRowBaseReader to read data from parquet and aggregate at spark
layer. Instead we want
+ * to get the partial Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct an InternalRow from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of InternalRow
+ */
+ private[sql] def createAggInternalRowFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): InternalRow = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val mutableRow = new SpecificInternalRow(aggSchema.fields.map(x =>
x.dataType))
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ mutableRow.setByte(i, values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+ mutableRow.setShort(i, values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ mutableRow.setInt(i, values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+ mutableRow.update(i,
dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Integer].toLong,
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ aggSchema.fields(i).dataType match {
+ case LongType =>
+ mutableRow.setLong(i, values(i).asInstanceOf[Long])
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Long], d.precision,
d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT64")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ mutableRow.setFloat(i, values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ mutableRow.setDouble(i, values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ mutableRow.setBoolean(i, values(i).asInstanceOf[Boolean])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case StringType =>
+ mutableRow.update(i, UTF8String.fromBytes(bytes))
+ case BinaryType =>
+ mutableRow.update(i, bytes)
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for Binary")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for FIXED_LEN_BYTE_ARRAY")
+ }
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ mutableRow
+ }
+
+ /**
+ * When the Aggregates (Max/Min/Count) are pushed down to parquet, in the
case of
+ * PARQUET_VECTORIZED_READER_ENABLED sets to true, we don't need
buildColumnarReader
+ * to read data from parquet and aggregate at spark layer. Instead we want
+ * to get the Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct a ColumnarBatch from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of ColumnarBatch
+ */
+ private[sql] def createAggColumnarBatchFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ offHeap: Boolean,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): ColumnarBatch = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val capacity = 4 * 1024
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+ val columnVectors = if (offHeap) {
+ OffHeapColumnVector.allocateColumns(capacity, aggSchema)
+ } else {
+ OnHeapColumnVector.allocateColumns(capacity, aggSchema)
+ }
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ columnVectors(i).appendByte(values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+
columnVectors(i).appendShort(values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ columnVectors(i).appendInt(values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+
columnVectors(i).appendInt(dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ s" for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ columnVectors(i).appendLong(values(i).asInstanceOf[Long])
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ columnVectors(i).appendFloat(values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ columnVectors(i).appendDouble(values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ columnVectors(i).putByteArray(0, bytes, 0, bytes.length)
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ columnVectors(i).putByteArray(0, bytes, 0, bytes.length)
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ columnVectors(i).appendBoolean(values(i).asInstanceOf[Boolean])
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ new ColumnarBatch(columnVectors.asInstanceOf[Array[ColumnVector]], 1)
+ }
+
+ /**
+ * Calculate the pushed down Aggregates (Max/Min/Count) result using the
statistics
+ * information from parquet footer file.
+ *
+ * @return A tuple of `Array[PrimitiveType.PrimitiveTypeName]` and
Array[Any].
+ * The first element is the PrimitiveTypeName of the Aggregate
column,
+ * and the second element is the aggregated value.
+ */
+ private[sql] def getPushedDownAggResult(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ isCaseSensitive: Boolean)
+ : (Array[PrimitiveType.PrimitiveTypeName], Array[Any]) = {
+ val footerFileMetaData = footer.getFileMetaData
+ val fields = footerFileMetaData.getSchema.getFields
+ val blocks = footer.getBlocks()
+ val typesBuilder = ArrayBuilder.make[PrimitiveType.PrimitiveTypeName]
+ val valuesBuilder = ArrayBuilder.make[Any]
+
+ aggregation.aggregateExpressions().foreach { agg =>
+ var value: Any = None
+ var rowCount = 0L
+ var isCount = false
+ var index = 0
+ blocks.forEach { block =>
+ val blockMetaData = block.getColumns()
+ agg match {
+ case max: Max =>
+ index =
dataSchema.fieldNames.toList.indexOf(max.column.fieldNames.head)
Review comment:
Should we handle case sensitivity here?
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
##########
@@ -127,4 +145,260 @@ object ParquetUtils {
file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE ||
file.getName == ParquetFileWriter.PARQUET_METADATA_FILE
}
+
+ /**
+ * When the partial Aggregates (Max/Min/Count) are pushed down to parquet,
we don't need to
+ * createRowBaseReader to read data from parquet and aggregate at spark
layer. Instead we want
+ * to get the partial Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct an InternalRow from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of InternalRow
+ */
+ private[sql] def createAggInternalRowFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): InternalRow = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val mutableRow = new SpecificInternalRow(aggSchema.fields.map(x =>
x.dataType))
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ mutableRow.setByte(i, values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+ mutableRow.setShort(i, values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ mutableRow.setInt(i, values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+ mutableRow.update(i,
dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Integer].toLong,
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ aggSchema.fields(i).dataType match {
+ case LongType =>
+ mutableRow.setLong(i, values(i).asInstanceOf[Long])
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Long], d.precision,
d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT64")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ mutableRow.setFloat(i, values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ mutableRow.setDouble(i, values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ mutableRow.setBoolean(i, values(i).asInstanceOf[Boolean])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case StringType =>
+ mutableRow.update(i, UTF8String.fromBytes(bytes))
+ case BinaryType =>
+ mutableRow.update(i, bytes)
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for Binary")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for FIXED_LEN_BYTE_ARRAY")
+ }
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ mutableRow
+ }
+
+ /**
+ * When the Aggregates (Max/Min/Count) are pushed down to parquet, in the
case of
+ * PARQUET_VECTORIZED_READER_ENABLED sets to true, we don't need
buildColumnarReader
+ * to read data from parquet and aggregate at spark layer. Instead we want
+ * to get the Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct a ColumnarBatch from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of ColumnarBatch
+ */
+ private[sql] def createAggColumnarBatchFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ offHeap: Boolean,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): ColumnarBatch = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val capacity = 4 * 1024
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+ val columnVectors = if (offHeap) {
+ OffHeapColumnVector.allocateColumns(capacity, aggSchema)
+ } else {
+ OnHeapColumnVector.allocateColumns(capacity, aggSchema)
+ }
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ columnVectors(i).appendByte(values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+
columnVectors(i).appendShort(values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ columnVectors(i).appendInt(values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+
columnVectors(i).appendInt(dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ s" for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ columnVectors(i).appendLong(values(i).asInstanceOf[Long])
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ columnVectors(i).appendFloat(values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ columnVectors(i).appendDouble(values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ columnVectors(i).putByteArray(0, bytes, 0, bytes.length)
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ columnVectors(i).putByteArray(0, bytes, 0, bytes.length)
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ columnVectors(i).appendBoolean(values(i).asInstanceOf[Boolean])
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ new ColumnarBatch(columnVectors.asInstanceOf[Array[ColumnVector]], 1)
+ }
+
+ /**
+ * Calculate the pushed down Aggregates (Max/Min/Count) result using the
statistics
+ * information from parquet footer file.
+ *
+ * @return A tuple of `Array[PrimitiveType.PrimitiveTypeName]` and
Array[Any].
+ * The first element is the PrimitiveTypeName of the Aggregate
column,
+ * and the second element is the aggregated value.
+ */
+ private[sql] def getPushedDownAggResult(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ isCaseSensitive: Boolean)
+ : (Array[PrimitiveType.PrimitiveTypeName], Array[Any]) = {
+ val footerFileMetaData = footer.getFileMetaData
+ val fields = footerFileMetaData.getSchema.getFields
+ val blocks = footer.getBlocks()
+ val typesBuilder = ArrayBuilder.make[PrimitiveType.PrimitiveTypeName]
+ val valuesBuilder = ArrayBuilder.make[Any]
+
+ aggregation.aggregateExpressions().foreach { agg =>
+ var value: Any = None
+ var rowCount = 0L
+ var isCount = false
+ var index = 0
+ blocks.forEach { block =>
+ val blockMetaData = block.getColumns()
+ agg match {
+ case max: Max =>
+ index =
dataSchema.fieldNames.toList.indexOf(max.column.fieldNames.head)
+ val currentMax = getCurrentBlockMaxOrMin(blockMetaData, index,
true)
+ if (currentMax != None &&
Review comment:
hmm in which situation `currentMax` will be `None`?
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
##########
@@ -127,4 +145,260 @@ object ParquetUtils {
file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE ||
file.getName == ParquetFileWriter.PARQUET_METADATA_FILE
}
+
+ /**
+ * When the partial Aggregates (Max/Min/Count) are pushed down to parquet,
we don't need to
Review comment:
nit: let's use "Parquet" with capital 'P', "Spark" with capital "S".
Also why `Aggregates` has capital 'A'?
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
##########
@@ -127,4 +145,260 @@ object ParquetUtils {
file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE ||
file.getName == ParquetFileWriter.PARQUET_METADATA_FILE
}
+
+ /**
+ * When the partial Aggregates (Max/Min/Count) are pushed down to parquet,
we don't need to
+ * createRowBaseReader to read data from parquet and aggregate at spark
layer. Instead we want
+ * to get the partial Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct an InternalRow from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of InternalRow
+ */
+ private[sql] def createAggInternalRowFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): InternalRow = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val mutableRow = new SpecificInternalRow(aggSchema.fields.map(x =>
x.dataType))
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ mutableRow.setByte(i, values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+ mutableRow.setShort(i, values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ mutableRow.setInt(i, values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+ mutableRow.update(i,
dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Integer].toLong,
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ aggSchema.fields(i).dataType match {
+ case LongType =>
+ mutableRow.setLong(i, values(i).asInstanceOf[Long])
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Long], d.precision,
d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT64")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ mutableRow.setFloat(i, values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ mutableRow.setDouble(i, values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ mutableRow.setBoolean(i, values(i).asInstanceOf[Boolean])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case StringType =>
+ mutableRow.update(i, UTF8String.fromBytes(bytes))
+ case BinaryType =>
+ mutableRow.update(i, bytes)
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for Binary")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for FIXED_LEN_BYTE_ARRAY")
+ }
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ mutableRow
+ }
+
+ /**
+ * When the Aggregates (Max/Min/Count) are pushed down to parquet, in the
case of
+ * PARQUET_VECTORIZED_READER_ENABLED sets to true, we don't need
buildColumnarReader
+ * to read data from parquet and aggregate at spark layer. Instead we want
+ * to get the Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct a ColumnarBatch from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of ColumnarBatch
+ */
+ private[sql] def createAggColumnarBatchFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ offHeap: Boolean,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): ColumnarBatch = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val capacity = 4 * 1024
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+ val columnVectors = if (offHeap) {
+ OffHeapColumnVector.allocateColumns(capacity, aggSchema)
+ } else {
+ OnHeapColumnVector.allocateColumns(capacity, aggSchema)
+ }
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ columnVectors(i).appendByte(values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+
columnVectors(i).appendShort(values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ columnVectors(i).appendInt(values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+
columnVectors(i).appendInt(dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ s" for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ columnVectors(i).appendLong(values(i).asInstanceOf[Long])
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ columnVectors(i).appendFloat(values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ columnVectors(i).appendDouble(values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ columnVectors(i).putByteArray(0, bytes, 0, bytes.length)
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ columnVectors(i).putByteArray(0, bytes, 0, bytes.length)
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ columnVectors(i).appendBoolean(values(i).asInstanceOf[Boolean])
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ new ColumnarBatch(columnVectors.asInstanceOf[Array[ColumnVector]], 1)
+ }
+
+ /**
+ * Calculate the pushed down Aggregates (Max/Min/Count) result using the
statistics
+ * information from parquet footer file.
+ *
+ * @return A tuple of `Array[PrimitiveType.PrimitiveTypeName]` and
Array[Any].
+ * The first element is the PrimitiveTypeName of the Aggregate
column,
+ * and the second element is the aggregated value.
+ */
+ private[sql] def getPushedDownAggResult(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ isCaseSensitive: Boolean)
+ : (Array[PrimitiveType.PrimitiveTypeName], Array[Any]) = {
+ val footerFileMetaData = footer.getFileMetaData
+ val fields = footerFileMetaData.getSchema.getFields
+ val blocks = footer.getBlocks()
+ val typesBuilder = ArrayBuilder.make[PrimitiveType.PrimitiveTypeName]
+ val valuesBuilder = ArrayBuilder.make[Any]
+
+ aggregation.aggregateExpressions().foreach { agg =>
+ var value: Any = None
+ var rowCount = 0L
+ var isCount = false
+ var index = 0
+ blocks.forEach { block =>
+ val blockMetaData = block.getColumns()
+ agg match {
+ case max: Max =>
+ index =
dataSchema.fieldNames.toList.indexOf(max.column.fieldNames.head)
+ val currentMax = getCurrentBlockMaxOrMin(blockMetaData, index,
true)
+ if (currentMax != None &&
+ (value == None ||
currentMax.asInstanceOf[Comparable[Any]].compareTo(value) > 0)) {
+ value = currentMax
+ }
+ case min: Min =>
+ index =
dataSchema.fieldNames.toList.indexOf(min.column.fieldNames.head)
+ val currentMin = getCurrentBlockMaxOrMin(blockMetaData, index,
false)
+ if (currentMin != None &&
+ (value == None ||
currentMin.asInstanceOf[Comparable[Any]].compareTo(value) < 0)) {
+ value = currentMin
+ }
+ case count: Count =>
+ rowCount += block.getRowCount
+ var isPartitionCol = false;
Review comment:
nit: remove `;`
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
##########
@@ -127,4 +145,260 @@ object ParquetUtils {
file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE ||
file.getName == ParquetFileWriter.PARQUET_METADATA_FILE
}
+
+ /**
+ * When the partial Aggregates (Max/Min/Count) are pushed down to parquet,
we don't need to
+ * createRowBaseReader to read data from parquet and aggregate at spark
layer. Instead we want
+ * to get the partial Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct an InternalRow from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of InternalRow
+ */
+ private[sql] def createAggInternalRowFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): InternalRow = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val mutableRow = new SpecificInternalRow(aggSchema.fields.map(x =>
x.dataType))
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ mutableRow.setByte(i, values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+ mutableRow.setShort(i, values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ mutableRow.setInt(i, values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+ mutableRow.update(i,
dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Integer].toLong,
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ aggSchema.fields(i).dataType match {
+ case LongType =>
+ mutableRow.setLong(i, values(i).asInstanceOf[Long])
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Long], d.precision,
d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT64")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ mutableRow.setFloat(i, values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ mutableRow.setDouble(i, values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ mutableRow.setBoolean(i, values(i).asInstanceOf[Boolean])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case StringType =>
+ mutableRow.update(i, UTF8String.fromBytes(bytes))
+ case BinaryType =>
+ mutableRow.update(i, bytes)
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for Binary")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for FIXED_LEN_BYTE_ARRAY")
+ }
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ mutableRow
+ }
+
+ /**
+ * When the Aggregates (Max/Min/Count) are pushed down to parquet, in the
case of
+ * PARQUET_VECTORIZED_READER_ENABLED sets to true, we don't need
buildColumnarReader
+ * to read data from parquet and aggregate at spark layer. Instead we want
+ * to get the Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct a ColumnarBatch from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of ColumnarBatch
+ */
+ private[sql] def createAggColumnarBatchFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ offHeap: Boolean,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): ColumnarBatch = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val capacity = 4 * 1024
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+ val columnVectors = if (offHeap) {
+ OffHeapColumnVector.allocateColumns(capacity, aggSchema)
+ } else {
+ OnHeapColumnVector.allocateColumns(capacity, aggSchema)
+ }
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ columnVectors(i).appendByte(values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+
columnVectors(i).appendShort(values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ columnVectors(i).appendInt(values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+
columnVectors(i).appendInt(dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ s" for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ columnVectors(i).appendLong(values(i).asInstanceOf[Long])
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ columnVectors(i).appendFloat(values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ columnVectors(i).appendDouble(values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ columnVectors(i).putByteArray(0, bytes, 0, bytes.length)
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ columnVectors(i).putByteArray(0, bytes, 0, bytes.length)
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ columnVectors(i).appendBoolean(values(i).asInstanceOf[Boolean])
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ new ColumnarBatch(columnVectors.asInstanceOf[Array[ColumnVector]], 1)
+ }
+
+ /**
+ * Calculate the pushed down Aggregates (Max/Min/Count) result using the
statistics
+ * information from parquet footer file.
+ *
+ * @return A tuple of `Array[PrimitiveType.PrimitiveTypeName]` and
Array[Any].
+ * The first element is the PrimitiveTypeName of the Aggregate
column,
+ * and the second element is the aggregated value.
+ */
+ private[sql] def getPushedDownAggResult(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ isCaseSensitive: Boolean)
+ : (Array[PrimitiveType.PrimitiveTypeName], Array[Any]) = {
+ val footerFileMetaData = footer.getFileMetaData
+ val fields = footerFileMetaData.getSchema.getFields
+ val blocks = footer.getBlocks()
+ val typesBuilder = ArrayBuilder.make[PrimitiveType.PrimitiveTypeName]
+ val valuesBuilder = ArrayBuilder.make[Any]
+
+ aggregation.aggregateExpressions().foreach { agg =>
+ var value: Any = None
+ var rowCount = 0L
+ var isCount = false
+ var index = 0
+ blocks.forEach { block =>
+ val blockMetaData = block.getColumns()
+ agg match {
+ case max: Max =>
+ index =
dataSchema.fieldNames.toList.indexOf(max.column.fieldNames.head)
+ val currentMax = getCurrentBlockMaxOrMin(blockMetaData, index,
true)
+ if (currentMax != None &&
+ (value == None ||
currentMax.asInstanceOf[Comparable[Any]].compareTo(value) > 0)) {
+ value = currentMax
+ }
+ case min: Min =>
+ index =
dataSchema.fieldNames.toList.indexOf(min.column.fieldNames.head)
+ val currentMin = getCurrentBlockMaxOrMin(blockMetaData, index,
false)
+ if (currentMin != None &&
+ (value == None ||
currentMin.asInstanceOf[Comparable[Any]].compareTo(value) < 0)) {
+ value = currentMin
+ }
+ case count: Count =>
+ rowCount += block.getRowCount
Review comment:
nit: `rowCount = block.getRowCount`? it's easier to understand.
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
##########
@@ -127,4 +145,260 @@ object ParquetUtils {
file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE ||
file.getName == ParquetFileWriter.PARQUET_METADATA_FILE
}
+
+ /**
+ * When the partial Aggregates (Max/Min/Count) are pushed down to parquet,
we don't need to
+ * createRowBaseReader to read data from parquet and aggregate at spark
layer. Instead we want
+ * to get the partial Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct an InternalRow from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of InternalRow
+ */
+ private[sql] def createAggInternalRowFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): InternalRow = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val mutableRow = new SpecificInternalRow(aggSchema.fields.map(x =>
x.dataType))
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ mutableRow.setByte(i, values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+ mutableRow.setShort(i, values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ mutableRow.setInt(i, values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+ mutableRow.update(i,
dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Integer].toLong,
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ aggSchema.fields(i).dataType match {
+ case LongType =>
+ mutableRow.setLong(i, values(i).asInstanceOf[Long])
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Long], d.precision,
d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT64")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ mutableRow.setFloat(i, values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ mutableRow.setDouble(i, values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ mutableRow.setBoolean(i, values(i).asInstanceOf[Boolean])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case StringType =>
+ mutableRow.update(i, UTF8String.fromBytes(bytes))
+ case BinaryType =>
+ mutableRow.update(i, bytes)
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for Binary")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for FIXED_LEN_BYTE_ARRAY")
+ }
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ mutableRow
+ }
+
+ /**
+ * When the Aggregates (Max/Min/Count) are pushed down to parquet, in the
case of
+ * PARQUET_VECTORIZED_READER_ENABLED sets to true, we don't need
buildColumnarReader
+ * to read data from parquet and aggregate at spark layer. Instead we want
+ * to get the Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct a ColumnarBatch from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of ColumnarBatch
+ */
+ private[sql] def createAggColumnarBatchFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ offHeap: Boolean,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): ColumnarBatch = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val capacity = 4 * 1024
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+ val columnVectors = if (offHeap) {
+ OffHeapColumnVector.allocateColumns(capacity, aggSchema)
+ } else {
+ OnHeapColumnVector.allocateColumns(capacity, aggSchema)
+ }
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ columnVectors(i).appendByte(values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+
columnVectors(i).appendShort(values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ columnVectors(i).appendInt(values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+
columnVectors(i).appendInt(dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ s" for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ columnVectors(i).appendLong(values(i).asInstanceOf[Long])
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ columnVectors(i).appendFloat(values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ columnVectors(i).appendDouble(values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ columnVectors(i).putByteArray(0, bytes, 0, bytes.length)
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ columnVectors(i).putByteArray(0, bytes, 0, bytes.length)
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ columnVectors(i).appendBoolean(values(i).asInstanceOf[Boolean])
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ new ColumnarBatch(columnVectors.asInstanceOf[Array[ColumnVector]], 1)
+ }
+
+ /**
+ * Calculate the pushed down Aggregates (Max/Min/Count) result using the
statistics
+ * information from parquet footer file.
+ *
+ * @return A tuple of `Array[PrimitiveType.PrimitiveTypeName]` and
Array[Any].
+ * The first element is the PrimitiveTypeName of the Aggregate
column,
+ * and the second element is the aggregated value.
+ */
+ private[sql] def getPushedDownAggResult(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ isCaseSensitive: Boolean)
+ : (Array[PrimitiveType.PrimitiveTypeName], Array[Any]) = {
+ val footerFileMetaData = footer.getFileMetaData
+ val fields = footerFileMetaData.getSchema.getFields
+ val blocks = footer.getBlocks()
+ val typesBuilder = ArrayBuilder.make[PrimitiveType.PrimitiveTypeName]
+ val valuesBuilder = ArrayBuilder.make[Any]
+
+ aggregation.aggregateExpressions().foreach { agg =>
+ var value: Any = None
+ var rowCount = 0L
+ var isCount = false
+ var index = 0
+ blocks.forEach { block =>
+ val blockMetaData = block.getColumns()
+ agg match {
+ case max: Max =>
+ index =
dataSchema.fieldNames.toList.indexOf(max.column.fieldNames.head)
+ val currentMax = getCurrentBlockMaxOrMin(blockMetaData, index,
true)
+ if (currentMax != None &&
+ (value == None ||
currentMax.asInstanceOf[Comparable[Any]].compareTo(value) > 0)) {
+ value = currentMax
+ }
+ case min: Min =>
+ index =
dataSchema.fieldNames.toList.indexOf(min.column.fieldNames.head)
+ val currentMin = getCurrentBlockMaxOrMin(blockMetaData, index,
false)
+ if (currentMin != None &&
+ (value == None ||
currentMin.asInstanceOf[Comparable[Any]].compareTo(value) < 0)) {
+ value = currentMin
+ }
+ case count: Count =>
+ rowCount += block.getRowCount
+ var isPartitionCol = false;
+ if (partitionSchema.fields.map(PartitioningUtils.getColName(_,
isCaseSensitive))
+ .toSet.contains(count.column().fieldNames.head)) {
+ isPartitionCol = true
+ }
+ isCount = true
+ if(!isPartitionCol) {
+ index =
dataSchema.fieldNames.toList.indexOf(count.column.fieldNames.head)
+ // Count(*) includes the null values, but Count (colName)
doesn't.
+ rowCount -= getNumNulls(blockMetaData, index)
+ }
+ case _: CountStar =>
+ rowCount += block.getRowCount
Review comment:
ditto
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
##########
@@ -127,4 +145,260 @@ object ParquetUtils {
file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE ||
file.getName == ParquetFileWriter.PARQUET_METADATA_FILE
}
+
+ /**
+ * When the partial Aggregates (Max/Min/Count) are pushed down to parquet,
we don't need to
+ * createRowBaseReader to read data from parquet and aggregate at spark
layer. Instead we want
+ * to get the partial Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct an InternalRow from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of InternalRow
+ */
+ private[sql] def createAggInternalRowFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): InternalRow = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val mutableRow = new SpecificInternalRow(aggSchema.fields.map(x =>
x.dataType))
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ mutableRow.setByte(i, values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+ mutableRow.setShort(i, values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ mutableRow.setInt(i, values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+ mutableRow.update(i,
dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Integer].toLong,
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ aggSchema.fields(i).dataType match {
+ case LongType =>
+ mutableRow.setLong(i, values(i).asInstanceOf[Long])
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Long], d.precision,
d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT64")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ mutableRow.setFloat(i, values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ mutableRow.setDouble(i, values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ mutableRow.setBoolean(i, values(i).asInstanceOf[Boolean])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case StringType =>
+ mutableRow.update(i, UTF8String.fromBytes(bytes))
+ case BinaryType =>
+ mutableRow.update(i, bytes)
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for Binary")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for FIXED_LEN_BYTE_ARRAY")
+ }
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ mutableRow
+ }
+
+ /**
+ * When the Aggregates (Max/Min/Count) are pushed down to parquet, in the
case of
+ * PARQUET_VECTORIZED_READER_ENABLED sets to true, we don't need
buildColumnarReader
+ * to read data from parquet and aggregate at spark layer. Instead we want
+ * to get the Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct a ColumnarBatch from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of ColumnarBatch
+ */
+ private[sql] def createAggColumnarBatchFromFooter(
Review comment:
maybe we can just create a row first and then convert to `ColumnarBatch`
using `RowToColumnConverter`?
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
##########
@@ -870,6 +870,13 @@ object SQLConf {
.checkValue(threshold => threshold >= 0, "The threshold must not be
negative.")
.createWithDefault(10)
+ val PARQUET_AGGREGATE_PUSHDOWN_ENABLED =
buildConf("spark.sql.parquet.aggregatePushdown")
+ .doc("If true, MAX/MIN/COUNT without filter and group by will be pushed" +
+ " down to parquet for optimization. ")
Review comment:
nit: "down to Parquet data source for optimization"?
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
##########
@@ -127,4 +145,260 @@ object ParquetUtils {
file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE ||
file.getName == ParquetFileWriter.PARQUET_METADATA_FILE
}
+
+ /**
+ * When the partial Aggregates (Max/Min/Count) are pushed down to parquet,
we don't need to
+ * createRowBaseReader to read data from parquet and aggregate at spark
layer. Instead we want
+ * to get the partial Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct an InternalRow from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of InternalRow
+ */
+ private[sql] def createAggInternalRowFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): InternalRow = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val mutableRow = new SpecificInternalRow(aggSchema.fields.map(x =>
x.dataType))
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ mutableRow.setByte(i, values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+ mutableRow.setShort(i, values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ mutableRow.setInt(i, values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+ mutableRow.update(i,
dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Integer].toLong,
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ aggSchema.fields(i).dataType match {
+ case LongType =>
+ mutableRow.setLong(i, values(i).asInstanceOf[Long])
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Long], d.precision,
d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT64")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ mutableRow.setFloat(i, values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ mutableRow.setDouble(i, values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ mutableRow.setBoolean(i, values(i).asInstanceOf[Boolean])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case StringType =>
+ mutableRow.update(i, UTF8String.fromBytes(bytes))
+ case BinaryType =>
+ mutableRow.update(i, bytes)
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for Binary")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for FIXED_LEN_BYTE_ARRAY")
+ }
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ mutableRow
+ }
+
+ /**
+ * When the Aggregates (Max/Min/Count) are pushed down to parquet, in the
case of
+ * PARQUET_VECTORIZED_READER_ENABLED sets to true, we don't need
buildColumnarReader
+ * to read data from parquet and aggregate at spark layer. Instead we want
+ * to get the Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct a ColumnarBatch from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of ColumnarBatch
+ */
+ private[sql] def createAggColumnarBatchFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ offHeap: Boolean,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): ColumnarBatch = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val capacity = 4 * 1024
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+ val columnVectors = if (offHeap) {
+ OffHeapColumnVector.allocateColumns(capacity, aggSchema)
+ } else {
+ OnHeapColumnVector.allocateColumns(capacity, aggSchema)
+ }
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ columnVectors(i).appendByte(values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+
columnVectors(i).appendShort(values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ columnVectors(i).appendInt(values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+
columnVectors(i).appendInt(dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ s" for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ columnVectors(i).appendLong(values(i).asInstanceOf[Long])
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ columnVectors(i).appendFloat(values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ columnVectors(i).appendDouble(values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ columnVectors(i).putByteArray(0, bytes, 0, bytes.length)
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ columnVectors(i).putByteArray(0, bytes, 0, bytes.length)
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ columnVectors(i).appendBoolean(values(i).asInstanceOf[Boolean])
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ new ColumnarBatch(columnVectors.asInstanceOf[Array[ColumnVector]], 1)
+ }
+
+ /**
+ * Calculate the pushed down Aggregates (Max/Min/Count) result using the
statistics
+ * information from parquet footer file.
+ *
+ * @return A tuple of `Array[PrimitiveType.PrimitiveTypeName]` and
Array[Any].
+ * The first element is the PrimitiveTypeName of the Aggregate
column,
+ * and the second element is the aggregated value.
+ */
+ private[sql] def getPushedDownAggResult(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ isCaseSensitive: Boolean)
+ : (Array[PrimitiveType.PrimitiveTypeName], Array[Any]) = {
+ val footerFileMetaData = footer.getFileMetaData
+ val fields = footerFileMetaData.getSchema.getFields
+ val blocks = footer.getBlocks()
+ val typesBuilder = ArrayBuilder.make[PrimitiveType.PrimitiveTypeName]
+ val valuesBuilder = ArrayBuilder.make[Any]
+
+ aggregation.aggregateExpressions().foreach { agg =>
+ var value: Any = None
+ var rowCount = 0L
+ var isCount = false
+ var index = 0
+ blocks.forEach { block =>
+ val blockMetaData = block.getColumns()
+ agg match {
+ case max: Max =>
+ index =
dataSchema.fieldNames.toList.indexOf(max.column.fieldNames.head)
+ val currentMax = getCurrentBlockMaxOrMin(blockMetaData, index,
true)
+ if (currentMax != None &&
+ (value == None ||
currentMax.asInstanceOf[Comparable[Any]].compareTo(value) > 0)) {
+ value = currentMax
+ }
+ case min: Min =>
+ index =
dataSchema.fieldNames.toList.indexOf(min.column.fieldNames.head)
+ val currentMin = getCurrentBlockMaxOrMin(blockMetaData, index,
false)
+ if (currentMin != None &&
+ (value == None ||
currentMin.asInstanceOf[Comparable[Any]].compareTo(value) < 0)) {
+ value = currentMin
+ }
+ case count: Count =>
+ rowCount += block.getRowCount
+ var isPartitionCol = false;
+ if (partitionSchema.fields.map(PartitioningUtils.getColName(_,
isCaseSensitive))
+ .toSet.contains(count.column().fieldNames.head)) {
+ isPartitionCol = true
+ }
+ isCount = true
+ if(!isPartitionCol) {
+ index =
dataSchema.fieldNames.toList.indexOf(count.column.fieldNames.head)
+ // Count(*) includes the null values, but Count (colName)
doesn't.
+ rowCount -= getNumNulls(blockMetaData, index)
+ }
+ case _: CountStar =>
+ rowCount += block.getRowCount
+ isCount = true
+ case _ =>
Review comment:
what about `Sum` - will it ever get here?
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
##########
@@ -80,43 +84,90 @@ case class ParquetPartitionReaderFactory(
private val datetimeRebaseModeInRead =
parquetOptions.datetimeRebaseModeInRead
private val int96RebaseModeInRead = parquetOptions.int96RebaseModeInRead
+ private def getFooter(file: PartitionedFile): ParquetMetadata = {
+ val conf = broadcastedConf.value.value
+ val filePath = new Path(new URI(file.filePath))
+
+ if (aggregation.isEmpty) {
+ ParquetFooterReader.readFooter(conf, filePath, SKIP_ROW_GROUPS)
+ } else {
+ ParquetFooterReader.readFooter(conf, filePath, NO_FILTER)
+ }
+ }
+
override def supportColumnarReads(partition: InputPartition): Boolean = {
sqlConf.parquetVectorizedReaderEnabled && sqlConf.wholeStageEnabled &&
resultSchema.length <= sqlConf.wholeStageMaxNumFields &&
resultSchema.forall(_.dataType.isInstanceOf[AtomicType])
}
override def buildReader(file: PartitionedFile):
PartitionReader[InternalRow] = {
- val reader = if (enableVectorizedReader) {
- createVectorizedReader(file)
+ val fileReader = if (aggregation.isEmpty) {
+ val reader = if (enableVectorizedReader) {
+ createVectorizedReader(file)
+ } else {
+ createRowBaseReader(file)
+ }
+
+ new PartitionReader[InternalRow] {
+ override def next(): Boolean = reader.nextKeyValue()
+
+ override def get(): InternalRow =
reader.getCurrentValue.asInstanceOf[InternalRow]
+
+ override def close(): Unit = reader.close()
+ }
} else {
- createRowBaseReader(file)
- }
+ new PartitionReader[InternalRow] {
+ var count = 0
- val fileReader = new PartitionReader[InternalRow] {
- override def next(): Boolean = reader.nextKeyValue()
+ override def next(): Boolean = if (count == 0) true else false
- override def get(): InternalRow =
reader.getCurrentValue.asInstanceOf[InternalRow]
+ override def get(): InternalRow = {
+ count += 1
+ val footer = getFooter(file)
+ ParquetUtils.createAggInternalRowFromFooter(footer, dataSchema,
partitionSchema,
+ aggregation.get, readDataSchema, datetimeRebaseModeInRead,
isCaseSensitive)
+ }
- override def close(): Unit = reader.close()
+ override def close(): Unit = return
+ }
}
new PartitionReaderWithPartitionValues(fileReader, readDataSchema,
partitionSchema, file.partitionValues)
}
override def buildColumnarReader(file: PartitionedFile):
PartitionReader[ColumnarBatch] = {
- val vectorizedReader = createVectorizedReader(file)
- vectorizedReader.enableReturningBatches()
+ val fileReader = if (aggregation.isEmpty) {
+ val vectorizedReader = createVectorizedReader(file)
+ vectorizedReader.enableReturningBatches()
+
+ new PartitionReader[ColumnarBatch] {
+ override def next(): Boolean = vectorizedReader.nextKeyValue()
- new PartitionReader[ColumnarBatch] {
- override def next(): Boolean = vectorizedReader.nextKeyValue()
+ override def get(): ColumnarBatch =
+ vectorizedReader.getCurrentValue.asInstanceOf[ColumnarBatch]
- override def get(): ColumnarBatch =
- vectorizedReader.getCurrentValue.asInstanceOf[ColumnarBatch]
+ override def close(): Unit = vectorizedReader.close()
+ }
+ } else {
+ new PartitionReader[ColumnarBatch] {
+ var count = 0
+
+ override def next(): Boolean = if (count == 0) true else false
- override def close(): Unit = vectorizedReader.close()
+ override def get(): ColumnarBatch = {
+ count += 1
+ val footer = getFooter(file)
+ ParquetUtils.createAggColumnarBatchFromFooter(footer, dataSchema,
partitionSchema,
+ aggregation.get, readDataSchema, enableOffHeapColumnVector,
datetimeRebaseModeInRead,
+ isCaseSensitive)
+ }
+
+ override def close(): Unit = return
Review comment:
ditto
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
##########
@@ -80,43 +84,90 @@ case class ParquetPartitionReaderFactory(
private val datetimeRebaseModeInRead =
parquetOptions.datetimeRebaseModeInRead
private val int96RebaseModeInRead = parquetOptions.int96RebaseModeInRead
+ private def getFooter(file: PartitionedFile): ParquetMetadata = {
+ val conf = broadcastedConf.value.value
+ val filePath = new Path(new URI(file.filePath))
+
+ if (aggregation.isEmpty) {
+ ParquetFooterReader.readFooter(conf, filePath, SKIP_ROW_GROUPS)
+ } else {
+ ParquetFooterReader.readFooter(conf, filePath, NO_FILTER)
+ }
+ }
+
override def supportColumnarReads(partition: InputPartition): Boolean = {
sqlConf.parquetVectorizedReaderEnabled && sqlConf.wholeStageEnabled &&
resultSchema.length <= sqlConf.wholeStageMaxNumFields &&
resultSchema.forall(_.dataType.isInstanceOf[AtomicType])
}
override def buildReader(file: PartitionedFile):
PartitionReader[InternalRow] = {
- val reader = if (enableVectorizedReader) {
- createVectorizedReader(file)
+ val fileReader = if (aggregation.isEmpty) {
+ val reader = if (enableVectorizedReader) {
+ createVectorizedReader(file)
+ } else {
+ createRowBaseReader(file)
+ }
+
+ new PartitionReader[InternalRow] {
+ override def next(): Boolean = reader.nextKeyValue()
+
+ override def get(): InternalRow =
reader.getCurrentValue.asInstanceOf[InternalRow]
+
+ override def close(): Unit = reader.close()
+ }
} else {
- createRowBaseReader(file)
- }
+ new PartitionReader[InternalRow] {
+ var count = 0
- val fileReader = new PartitionReader[InternalRow] {
- override def next(): Boolean = reader.nextKeyValue()
+ override def next(): Boolean = if (count == 0) true else false
- override def get(): InternalRow =
reader.getCurrentValue.asInstanceOf[InternalRow]
+ override def get(): InternalRow = {
+ count += 1
+ val footer = getFooter(file)
+ ParquetUtils.createAggInternalRowFromFooter(footer, dataSchema,
partitionSchema,
+ aggregation.get, readDataSchema, datetimeRebaseModeInRead,
isCaseSensitive)
+ }
- override def close(): Unit = reader.close()
+ override def close(): Unit = return
Review comment:
nit: change `return` to `{}`
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
##########
@@ -127,4 +145,260 @@ object ParquetUtils {
file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE ||
file.getName == ParquetFileWriter.PARQUET_METADATA_FILE
}
+
+ /**
+ * When the partial Aggregates (Max/Min/Count) are pushed down to parquet,
we don't need to
+ * createRowBaseReader to read data from parquet and aggregate at spark
layer. Instead we want
+ * to get the partial Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct an InternalRow from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of InternalRow
+ */
+ private[sql] def createAggInternalRowFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): InternalRow = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val mutableRow = new SpecificInternalRow(aggSchema.fields.map(x =>
x.dataType))
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+
+ parquetTypes.zipWithIndex.foreach {
Review comment:
I wonder whether we could leverage `ParquetRowConverter` to do the job.
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
##########
@@ -585,8 +585,8 @@ private[sql] object ParquetSchemaConverter {
Types.buildMessage().named(ParquetSchemaConverter.SPARK_PARQUET_SCHEMA_NAME)
def checkFieldName(name: String): Unit = {
- // ,;{}()\n\t= and space are special characters in Parquet schema
- if (name.matches(".*[ ,;{}()\n\t=].*")) {
+ // ,;{}\n\t= and space are special characters in Parquet schema
+ if (name.matches(".*[ ,;{}\n\t=].*")) {
Review comment:
This is field name though - why should we use `Max(col)` or `Min(col)`
as field name? it looks weird.
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
##########
@@ -80,43 +84,90 @@ case class ParquetPartitionReaderFactory(
private val datetimeRebaseModeInRead =
parquetOptions.datetimeRebaseModeInRead
private val int96RebaseModeInRead = parquetOptions.int96RebaseModeInRead
+ private def getFooter(file: PartitionedFile): ParquetMetadata = {
+ val conf = broadcastedConf.value.value
+ val filePath = new Path(new URI(file.filePath))
+
+ if (aggregation.isEmpty) {
+ ParquetFooterReader.readFooter(conf, filePath, SKIP_ROW_GROUPS)
+ } else {
+ ParquetFooterReader.readFooter(conf, filePath, NO_FILTER)
+ }
+ }
+
override def supportColumnarReads(partition: InputPartition): Boolean = {
sqlConf.parquetVectorizedReaderEnabled && sqlConf.wholeStageEnabled &&
resultSchema.length <= sqlConf.wholeStageMaxNumFields &&
resultSchema.forall(_.dataType.isInstanceOf[AtomicType])
}
override def buildReader(file: PartitionedFile):
PartitionReader[InternalRow] = {
- val reader = if (enableVectorizedReader) {
- createVectorizedReader(file)
+ val fileReader = if (aggregation.isEmpty) {
+ val reader = if (enableVectorizedReader) {
+ createVectorizedReader(file)
+ } else {
+ createRowBaseReader(file)
+ }
+
+ new PartitionReader[InternalRow] {
+ override def next(): Boolean = reader.nextKeyValue()
+
+ override def get(): InternalRow =
reader.getCurrentValue.asInstanceOf[InternalRow]
+
+ override def close(): Unit = reader.close()
+ }
} else {
- createRowBaseReader(file)
- }
+ new PartitionReader[InternalRow] {
+ var count = 0
- val fileReader = new PartitionReader[InternalRow] {
- override def next(): Boolean = reader.nextKeyValue()
+ override def next(): Boolean = if (count == 0) true else false
- override def get(): InternalRow =
reader.getCurrentValue.asInstanceOf[InternalRow]
+ override def get(): InternalRow = {
+ count += 1
+ val footer = getFooter(file)
+ ParquetUtils.createAggInternalRowFromFooter(footer, dataSchema,
partitionSchema,
+ aggregation.get, readDataSchema, datetimeRebaseModeInRead,
isCaseSensitive)
+ }
- override def close(): Unit = reader.close()
+ override def close(): Unit = return
+ }
}
new PartitionReaderWithPartitionValues(fileReader, readDataSchema,
partitionSchema, file.partitionValues)
}
override def buildColumnarReader(file: PartitionedFile):
PartitionReader[ColumnarBatch] = {
- val vectorizedReader = createVectorizedReader(file)
- vectorizedReader.enableReturningBatches()
+ val fileReader = if (aggregation.isEmpty) {
+ val vectorizedReader = createVectorizedReader(file)
+ vectorizedReader.enableReturningBatches()
+
+ new PartitionReader[ColumnarBatch] {
+ override def next(): Boolean = vectorizedReader.nextKeyValue()
- new PartitionReader[ColumnarBatch] {
- override def next(): Boolean = vectorizedReader.nextKeyValue()
+ override def get(): ColumnarBatch =
+ vectorizedReader.getCurrentValue.asInstanceOf[ColumnarBatch]
- override def get(): ColumnarBatch =
- vectorizedReader.getCurrentValue.asInstanceOf[ColumnarBatch]
+ override def close(): Unit = vectorizedReader.close()
+ }
+ } else {
+ new PartitionReader[ColumnarBatch] {
+ var count = 0
+
+ override def next(): Boolean = if (count == 0) true else false
- override def close(): Unit = vectorizedReader.close()
+ override def get(): ColumnarBatch = {
+ count += 1
Review comment:
ditto
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
##########
@@ -127,4 +145,260 @@ object ParquetUtils {
file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE ||
file.getName == ParquetFileWriter.PARQUET_METADATA_FILE
}
+
+ /**
+ * When the partial Aggregates (Max/Min/Count) are pushed down to parquet,
we don't need to
+ * createRowBaseReader to read data from parquet and aggregate at spark
layer. Instead we want
+ * to get the partial Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct an InternalRow from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of InternalRow
+ */
+ private[sql] def createAggInternalRowFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): InternalRow = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val mutableRow = new SpecificInternalRow(aggSchema.fields.map(x =>
x.dataType))
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ mutableRow.setByte(i, values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+ mutableRow.setShort(i, values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ mutableRow.setInt(i, values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+ mutableRow.update(i,
dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Integer].toLong,
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ aggSchema.fields(i).dataType match {
+ case LongType =>
+ mutableRow.setLong(i, values(i).asInstanceOf[Long])
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Long], d.precision,
d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT64")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ mutableRow.setFloat(i, values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ mutableRow.setDouble(i, values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ mutableRow.setBoolean(i, values(i).asInstanceOf[Boolean])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case StringType =>
+ mutableRow.update(i, UTF8String.fromBytes(bytes))
+ case BinaryType =>
+ mutableRow.update(i, bytes)
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for Binary")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for FIXED_LEN_BYTE_ARRAY")
+ }
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ mutableRow
+ }
+
+ /**
+ * When the Aggregates (Max/Min/Count) are pushed down to parquet, in the
case of
+ * PARQUET_VECTORIZED_READER_ENABLED sets to true, we don't need
buildColumnarReader
+ * to read data from parquet and aggregate at spark layer. Instead we want
+ * to get the Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct a ColumnarBatch from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of ColumnarBatch
+ */
+ private[sql] def createAggColumnarBatchFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ offHeap: Boolean,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): ColumnarBatch = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val capacity = 4 * 1024
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+ val columnVectors = if (offHeap) {
+ OffHeapColumnVector.allocateColumns(capacity, aggSchema)
+ } else {
+ OnHeapColumnVector.allocateColumns(capacity, aggSchema)
+ }
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ columnVectors(i).appendByte(values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+
columnVectors(i).appendShort(values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ columnVectors(i).appendInt(values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+
columnVectors(i).appendInt(dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ s" for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ columnVectors(i).appendLong(values(i).asInstanceOf[Long])
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ columnVectors(i).appendFloat(values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ columnVectors(i).appendDouble(values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ columnVectors(i).putByteArray(0, bytes, 0, bytes.length)
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ columnVectors(i).putByteArray(0, bytes, 0, bytes.length)
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ columnVectors(i).appendBoolean(values(i).asInstanceOf[Boolean])
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ new ColumnarBatch(columnVectors.asInstanceOf[Array[ColumnVector]], 1)
+ }
+
+ /**
+ * Calculate the pushed down Aggregates (Max/Min/Count) result using the
statistics
+ * information from parquet footer file.
+ *
+ * @return A tuple of `Array[PrimitiveType.PrimitiveTypeName]` and
Array[Any].
+ * The first element is the PrimitiveTypeName of the Aggregate
column,
+ * and the second element is the aggregated value.
+ */
+ private[sql] def getPushedDownAggResult(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ isCaseSensitive: Boolean)
+ : (Array[PrimitiveType.PrimitiveTypeName], Array[Any]) = {
+ val footerFileMetaData = footer.getFileMetaData
+ val fields = footerFileMetaData.getSchema.getFields
+ val blocks = footer.getBlocks()
+ val typesBuilder = ArrayBuilder.make[PrimitiveType.PrimitiveTypeName]
+ val valuesBuilder = ArrayBuilder.make[Any]
+
+ aggregation.aggregateExpressions().foreach { agg =>
+ var value: Any = None
+ var rowCount = 0L
+ var isCount = false
+ var index = 0
+ blocks.forEach { block =>
+ val blockMetaData = block.getColumns()
+ agg match {
+ case max: Max =>
+ index =
dataSchema.fieldNames.toList.indexOf(max.column.fieldNames.head)
+ val currentMax = getCurrentBlockMaxOrMin(blockMetaData, index,
true)
+ if (currentMax != None &&
+ (value == None ||
currentMax.asInstanceOf[Comparable[Any]].compareTo(value) > 0)) {
+ value = currentMax
+ }
+ case min: Min =>
+ index =
dataSchema.fieldNames.toList.indexOf(min.column.fieldNames.head)
Review comment:
seems this won't work for complex types (e.g., `SELECT MIN(col.f1.f2)
FROM table`)?
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
##########
@@ -127,4 +145,260 @@ object ParquetUtils {
file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE ||
file.getName == ParquetFileWriter.PARQUET_METADATA_FILE
}
+
+ /**
+ * When the partial Aggregates (Max/Min/Count) are pushed down to parquet,
we don't need to
+ * createRowBaseReader to read data from parquet and aggregate at spark
layer. Instead we want
+ * to get the partial Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct an InternalRow from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of InternalRow
+ */
+ private[sql] def createAggInternalRowFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): InternalRow = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val mutableRow = new SpecificInternalRow(aggSchema.fields.map(x =>
x.dataType))
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ mutableRow.setByte(i, values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+ mutableRow.setShort(i, values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ mutableRow.setInt(i, values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+ mutableRow.update(i,
dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Integer].toLong,
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ aggSchema.fields(i).dataType match {
+ case LongType =>
+ mutableRow.setLong(i, values(i).asInstanceOf[Long])
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Long], d.precision,
d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT64")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ mutableRow.setFloat(i, values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ mutableRow.setDouble(i, values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ mutableRow.setBoolean(i, values(i).asInstanceOf[Boolean])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case StringType =>
+ mutableRow.update(i, UTF8String.fromBytes(bytes))
+ case BinaryType =>
+ mutableRow.update(i, bytes)
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for Binary")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for FIXED_LEN_BYTE_ARRAY")
+ }
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ mutableRow
+ }
+
+ /**
+ * When the Aggregates (Max/Min/Count) are pushed down to parquet, in the
case of
+ * PARQUET_VECTORIZED_READER_ENABLED sets to true, we don't need
buildColumnarReader
+ * to read data from parquet and aggregate at spark layer. Instead we want
+ * to get the Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct a ColumnarBatch from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of ColumnarBatch
+ */
+ private[sql] def createAggColumnarBatchFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ offHeap: Boolean,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): ColumnarBatch = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val capacity = 4 * 1024
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+ val columnVectors = if (offHeap) {
+ OffHeapColumnVector.allocateColumns(capacity, aggSchema)
+ } else {
+ OnHeapColumnVector.allocateColumns(capacity, aggSchema)
+ }
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ columnVectors(i).appendByte(values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+
columnVectors(i).appendShort(values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ columnVectors(i).appendInt(values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+
columnVectors(i).appendInt(dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ s" for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ columnVectors(i).appendLong(values(i).asInstanceOf[Long])
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ columnVectors(i).appendFloat(values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ columnVectors(i).appendDouble(values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ columnVectors(i).putByteArray(0, bytes, 0, bytes.length)
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ columnVectors(i).putByteArray(0, bytes, 0, bytes.length)
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ columnVectors(i).appendBoolean(values(i).asInstanceOf[Boolean])
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ new ColumnarBatch(columnVectors.asInstanceOf[Array[ColumnVector]], 1)
+ }
+
+ /**
+ * Calculate the pushed down Aggregates (Max/Min/Count) result using the
statistics
+ * information from parquet footer file.
+ *
+ * @return A tuple of `Array[PrimitiveType.PrimitiveTypeName]` and
Array[Any].
+ * The first element is the PrimitiveTypeName of the Aggregate
column,
+ * and the second element is the aggregated value.
+ */
+ private[sql] def getPushedDownAggResult(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ isCaseSensitive: Boolean)
+ : (Array[PrimitiveType.PrimitiveTypeName], Array[Any]) = {
+ val footerFileMetaData = footer.getFileMetaData
+ val fields = footerFileMetaData.getSchema.getFields
+ val blocks = footer.getBlocks()
+ val typesBuilder = ArrayBuilder.make[PrimitiveType.PrimitiveTypeName]
+ val valuesBuilder = ArrayBuilder.make[Any]
+
+ aggregation.aggregateExpressions().foreach { agg =>
+ var value: Any = None
+ var rowCount = 0L
+ var isCount = false
+ var index = 0
+ blocks.forEach { block =>
+ val blockMetaData = block.getColumns()
+ agg match {
+ case max: Max =>
+ index =
dataSchema.fieldNames.toList.indexOf(max.column.fieldNames.head)
+ val currentMax = getCurrentBlockMaxOrMin(blockMetaData, index,
true)
+ if (currentMax != None &&
+ (value == None ||
currentMax.asInstanceOf[Comparable[Any]].compareTo(value) > 0)) {
+ value = currentMax
+ }
+ case min: Min =>
+ index =
dataSchema.fieldNames.toList.indexOf(min.column.fieldNames.head)
+ val currentMin = getCurrentBlockMaxOrMin(blockMetaData, index,
false)
+ if (currentMin != None &&
+ (value == None ||
currentMin.asInstanceOf[Comparable[Any]].compareTo(value) < 0)) {
+ value = currentMin
+ }
+ case count: Count =>
+ rowCount += block.getRowCount
+ var isPartitionCol = false;
+ if (partitionSchema.fields.map(PartitioningUtils.getColName(_,
isCaseSensitive))
+ .toSet.contains(count.column().fieldNames.head)) {
+ isPartitionCol = true
+ }
+ isCount = true
+ if(!isPartitionCol) {
+ index =
dataSchema.fieldNames.toList.indexOf(count.column.fieldNames.head)
+ // Count(*) includes the null values, but Count (colName)
doesn't.
+ rowCount -= getNumNulls(blockMetaData, index)
Review comment:
what about `COUNT(DISTINCT)`?
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
##########
@@ -80,43 +84,90 @@ case class ParquetPartitionReaderFactory(
private val datetimeRebaseModeInRead =
parquetOptions.datetimeRebaseModeInRead
private val int96RebaseModeInRead = parquetOptions.int96RebaseModeInRead
+ private def getFooter(file: PartitionedFile): ParquetMetadata = {
+ val conf = broadcastedConf.value.value
+ val filePath = new Path(new URI(file.filePath))
+
+ if (aggregation.isEmpty) {
+ ParquetFooterReader.readFooter(conf, filePath, SKIP_ROW_GROUPS)
+ } else {
+ ParquetFooterReader.readFooter(conf, filePath, NO_FILTER)
+ }
+ }
+
override def supportColumnarReads(partition: InputPartition): Boolean = {
sqlConf.parquetVectorizedReaderEnabled && sqlConf.wholeStageEnabled &&
resultSchema.length <= sqlConf.wholeStageMaxNumFields &&
resultSchema.forall(_.dataType.isInstanceOf[AtomicType])
}
override def buildReader(file: PartitionedFile):
PartitionReader[InternalRow] = {
- val reader = if (enableVectorizedReader) {
- createVectorizedReader(file)
+ val fileReader = if (aggregation.isEmpty) {
+ val reader = if (enableVectorizedReader) {
+ createVectorizedReader(file)
+ } else {
+ createRowBaseReader(file)
+ }
+
+ new PartitionReader[InternalRow] {
+ override def next(): Boolean = reader.nextKeyValue()
+
+ override def get(): InternalRow =
reader.getCurrentValue.asInstanceOf[InternalRow]
+
+ override def close(): Unit = reader.close()
+ }
} else {
- createRowBaseReader(file)
- }
+ new PartitionReader[InternalRow] {
+ var count = 0
- val fileReader = new PartitionReader[InternalRow] {
- override def next(): Boolean = reader.nextKeyValue()
+ override def next(): Boolean = if (count == 0) true else false
- override def get(): InternalRow =
reader.getCurrentValue.asInstanceOf[InternalRow]
+ override def get(): InternalRow = {
+ count += 1
Review comment:
nit: can we use a boolean flag instead?
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
##########
@@ -127,4 +145,260 @@ object ParquetUtils {
file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE ||
file.getName == ParquetFileWriter.PARQUET_METADATA_FILE
}
+
+ /**
+ * When the partial Aggregates (Max/Min/Count) are pushed down to parquet,
we don't need to
+ * createRowBaseReader to read data from parquet and aggregate at spark
layer. Instead we want
+ * to get the partial Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct an InternalRow from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of InternalRow
+ */
+ private[sql] def createAggInternalRowFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): InternalRow = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val mutableRow = new SpecificInternalRow(aggSchema.fields.map(x =>
x.dataType))
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
+ aggSchema.fields(i).dataType match {
+ case ByteType =>
+ mutableRow.setByte(i, values(i).asInstanceOf[Integer].toByte)
+ case ShortType =>
+ mutableRow.setShort(i, values(i).asInstanceOf[Integer].toShort)
+ case IntegerType =>
+ mutableRow.setInt(i, values(i).asInstanceOf[Integer])
+ case DateType =>
+ val dateRebaseFunc = DataSourceUtils.creteDateRebaseFuncInRead(
+ datetimeRebaseMode, "Parquet")
+ mutableRow.update(i,
dateRebaseFunc(values(i).asInstanceOf[Integer]))
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Integer].toLong,
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT32")
+ }
+ case (PrimitiveType.PrimitiveTypeName.INT64, i) =>
+ aggSchema.fields(i).dataType match {
+ case LongType =>
+ mutableRow.setLong(i, values(i).asInstanceOf[Long])
+ case d: DecimalType =>
+ val decimal = Decimal(values(i).asInstanceOf[Long], d.precision,
d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for INT64")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FLOAT, i) =>
+ mutableRow.setFloat(i, values(i).asInstanceOf[Float])
+ case (PrimitiveType.PrimitiveTypeName.DOUBLE, i) =>
+ mutableRow.setDouble(i, values(i).asInstanceOf[Double])
+ case (PrimitiveType.PrimitiveTypeName.BOOLEAN, i) =>
+ mutableRow.setBoolean(i, values(i).asInstanceOf[Boolean])
+ case (PrimitiveType.PrimitiveTypeName.BINARY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case StringType =>
+ mutableRow.update(i, UTF8String.fromBytes(bytes))
+ case BinaryType =>
+ mutableRow.update(i, bytes)
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for Binary")
+ }
+ case (PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, i) =>
+ val bytes = values(i).asInstanceOf[Binary].getBytes
+ aggSchema.fields(i).dataType match {
+ case d: DecimalType =>
+ val decimal =
+ Decimal(new BigDecimal(new BigInteger(bytes), d.scale),
d.precision, d.scale)
+ mutableRow.setDecimal(i, decimal, d.precision)
+ case _ => throw new SparkException(s"Unexpected type
${aggSchema.fields(i).dataType}" +
+ " for FIXED_LEN_BYTE_ARRAY")
+ }
+ case _ =>
+ throw new SparkException("Unexpected parquet type name")
+ }
+ mutableRow
+ }
+
+ /**
+ * When the Aggregates (Max/Min/Count) are pushed down to parquet, in the
case of
+ * PARQUET_VECTORIZED_READER_ENABLED sets to true, we don't need
buildColumnarReader
+ * to read data from parquet and aggregate at spark layer. Instead we want
+ * to get the Aggregates (Max/Min/Count) result using the statistics
information
+ * from parquet footer file, and then construct a ColumnarBatch from these
Aggregate results.
+ *
+ * @return Aggregate results in the format of ColumnarBatch
+ */
+ private[sql] def createAggColumnarBatchFromFooter(
+ footer: ParquetMetadata,
+ dataSchema: StructType,
+ partitionSchema: StructType,
+ aggregation: Aggregation,
+ aggSchema: StructType,
+ offHeap: Boolean,
+ datetimeRebaseModeInRead: String,
+ isCaseSensitive: Boolean): ColumnarBatch = {
+ val (parquetTypes, values) =
+ getPushedDownAggResult(footer, dataSchema, partitionSchema, aggregation,
isCaseSensitive)
+ val capacity = 4 * 1024
+ val footerFileMetaData = footer.getFileMetaData
+ val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
+ footerFileMetaData.getKeyValueMetaData.get, datetimeRebaseModeInRead)
+ val columnVectors = if (offHeap) {
+ OffHeapColumnVector.allocateColumns(capacity, aggSchema)
+ } else {
+ OnHeapColumnVector.allocateColumns(capacity, aggSchema)
+ }
+
+ parquetTypes.zipWithIndex.foreach {
+ case (PrimitiveType.PrimitiveTypeName.INT32, i) =>
Review comment:
we may also need to handle unsigned int32, int64 etc.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]