This is an automated email from the ASF dual-hosted git repository.
yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 3fa07bb841fd [SPARK-55305][SQL][TESTS] Use
`ParquetFooterReader.readFooter` uniformly in test code to read the footer
3fa07bb841fd is described below
commit 3fa07bb841fd9bb6e9a4e6c0f17ae2dfa86d65ca
Author: yangjie01 <[email protected]>
AuthorDate: Mon Feb 2 16:01:25 2026 +0800
[SPARK-55305][SQL][TESTS] Use `ParquetFooterReader.readFooter` uniformly in
test code to read the footer
### What changes were proposed in this pull request?
This pr change to use `ParquetFooterReader.readFooter` uniformly in Spark
test code to read the footer.
### Why are the changes needed?
Use the methods encapsulated by Spark and avoid using deprecated APIs in
`ParquetFileReader` (specifically, `ParquetFileReader#readFooter(Configuration,
Path)` has been deprecated).
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Pass Github Actions
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #54086 from LuciferYang/use-ParquetFooterReader-readFooter-in-tests.
Authored-by: yangjie01 <[email protected]>
Signed-off-by: yangjie01 <[email protected]>
---
.../datasources/parquet/ParquetRowIndexSuite.scala | 7 +++++--
.../parquet/ParquetTypeWideningSuite.scala | 19 ++++++++++---------
.../org/apache/spark/sql/hive/HiveParquetSuite.scala | 8 +++++---
3 files changed, 20 insertions(+), 14 deletions(-)
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala
index 08fd8a9ecb53..fd96e23b17c9 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala
@@ -22,8 +22,10 @@ import scala.jdk.CollectionConverters._
import org.apache.hadoop.fs.Path
import org.apache.parquet.column.ParquetProperties._
-import org.apache.parquet.hadoop.{ParquetFileReader, ParquetOutputFormat}
+import org.apache.parquet.format.converter.ParquetMetadataConverter
+import org.apache.parquet.hadoop.ParquetOutputFormat
import org.apache.parquet.hadoop.ParquetWriter.DEFAULT_BLOCK_SIZE
+import org.apache.parquet.hadoop.util.HadoopInputFile
import org.apache.spark.SparkException
import org.apache.spark.sql.QueryTest
@@ -43,7 +45,8 @@ class ParquetRowIndexSuite extends QueryTest with
SharedSparkSession {
import testImplicits._
private def readRowGroupRowCounts(path: String): Seq[Long] = {
- ParquetFileReader.readFooter(spark.sessionState.newHadoopConf(), new
Path(path))
+ val inputFile = HadoopInputFile.fromPath(new Path(path),
spark.sessionState.newHadoopConf())
+ ParquetFooterReader.readFooter(inputFile,
ParquetMetadataConverter.NO_FILTER)
.getBlocks.asScala.toSeq.map(_.getRowCount)
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala
index 09ed6955a516..2fc42f19743f 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala
@@ -21,7 +21,8 @@ import java.io.File
import org.apache.hadoop.fs.Path
import org.apache.parquet.column.{Encoding, ParquetProperties}
import org.apache.parquet.format.converter.ParquetMetadataConverter
-import org.apache.parquet.hadoop.{ParquetFileReader, ParquetOutputFormat}
+import org.apache.parquet.hadoop.ParquetOutputFormat
+import org.apache.parquet.hadoop.util.HadoopInputFile
import org.apache.spark.SparkException
import org.apache.spark.sql.{DataFrame, QueryTest, Row}
@@ -145,10 +146,10 @@ class ParquetTypeWideningSuite
*/
private def assertAllParquetFilesDictionaryEncoded(dir: File): Unit = {
dir.listFiles(_.getName.endsWith(".parquet")).foreach { file =>
- val parquetMetadata = ParquetFileReader.readFooter(
- spark.sessionState.newHadoopConf(),
- new Path(dir.toString, file.getName),
- ParquetMetadataConverter.NO_FILTER)
+ val inputFile = HadoopInputFile.fromPath(
+ new Path(dir.toString, file.getName),
spark.sessionState.newHadoopConf())
+ val parquetMetadata =
+ ParquetFooterReader.readFooter(inputFile,
ParquetMetadataConverter.NO_FILTER)
parquetMetadata.getBlocks.forEach { block =>
block.getColumns.forEach { col =>
assert(
@@ -166,10 +167,10 @@ class ParquetTypeWideningSuite
*/
private def assertParquetV2Encoding(dir: File, expected_encoding: Encoding):
Unit = {
dir.listFiles(_.getName.endsWith(".parquet")).foreach { file =>
- val parquetMetadata = ParquetFileReader.readFooter(
- spark.sessionState.newHadoopConf(),
- new Path(dir.toString, file.getName),
- ParquetMetadataConverter.NO_FILTER)
+ val inputFile = HadoopInputFile.fromPath(
+ new Path(dir.toString, file.getName),
spark.sessionState.newHadoopConf())
+ val parquetMetadata =
+ ParquetFooterReader.readFooter(inputFile,
ParquetMetadataConverter.NO_FILTER)
parquetMetadata.getBlocks.forEach { block =>
block.getColumns.forEach { col =>
assert(
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
index 8fd37234f08e..d4d190c3573f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
@@ -22,11 +22,12 @@ import java.time.{Duration, Period}
import java.time.temporal.ChronoUnit
import org.apache.hadoop.fs.Path
-import org.apache.parquet.hadoop.ParquetFileReader
+import org.apache.parquet.format.converter.ParquetMetadataConverter
+import org.apache.parquet.hadoop.util.HadoopInputFile
import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
import org.apache.spark.sql.catalyst.TableIdentifier
-import
org.apache.spark.sql.execution.datasources.parquet.{ParquetCompressionCodec,
ParquetTest}
+import
org.apache.spark.sql.execution.datasources.parquet.{ParquetCompressionCodec,
ParquetFooterReader, ParquetTest}
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.SQLConf
@@ -214,7 +215,8 @@ class HiveParquetSuite extends QueryTest
val conf = spark.sessionState.newHadoopConf()
val file = parquetFiles.head
- val footer = ParquetFileReader.readFooter(conf, new
Path(file.getAbsolutePath))
+ val inputFile = HadoopInputFile.fromPath(new Path(file.getAbsolutePath),
conf)
+ val footer = ParquetFooterReader.readFooter(inputFile,
ParquetMetadataConverter.NO_FILTER)
val codec = footer.getBlocks.get(0).getColumns.get(0).getCodec.name()
assert(codec.equalsIgnoreCase(ParquetCompressionCodec.SNAPPY.lowerCaseName()),
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]