(spark) branch master updated: [SPARK-55305][SQL][TESTS] Use `ParquetFooterReader.readFooter` uniformly in test code to read the footer

yangjie01 Mon, 02 Feb 2026 00:02:27 -0800

This is an automated email from the ASF dual-hosted git repository.

yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 3fa07bb841fd [SPARK-55305][SQL][TESTS] Use 
`ParquetFooterReader.readFooter` uniformly in test code to read the footer
3fa07bb841fd is described below

commit 3fa07bb841fd9bb6e9a4e6c0f17ae2dfa86d65ca
Author: yangjie01 <[email protected]>
AuthorDate: Mon Feb 2 16:01:25 2026 +0800

    [SPARK-55305][SQL][TESTS] Use `ParquetFooterReader.readFooter` uniformly in 
test code to read the footer
    
    ### What changes were proposed in this pull request?
    This pr change to use `ParquetFooterReader.readFooter` uniformly in Spark 
test code to read the footer.
    
    ### Why are the changes needed?
    Use the methods encapsulated by Spark and avoid using deprecated APIs in 
`ParquetFileReader` (specifically, `ParquetFileReader#readFooter(Configuration, 
Path)` has been deprecated).
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Pass Github Actions
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #54086 from LuciferYang/use-ParquetFooterReader-readFooter-in-tests.
    
    Authored-by: yangjie01 <[email protected]>
    Signed-off-by: yangjie01 <[email protected]>
---
 .../datasources/parquet/ParquetRowIndexSuite.scala    |  7 +++++--
 .../parquet/ParquetTypeWideningSuite.scala            | 19 ++++++++++---------
 .../org/apache/spark/sql/hive/HiveParquetSuite.scala  |  8 +++++---
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala
index 08fd8a9ecb53..fd96e23b17c9 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala
@@ -22,8 +22,10 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.hadoop.fs.Path
 import org.apache.parquet.column.ParquetProperties._
-import org.apache.parquet.hadoop.{ParquetFileReader, ParquetOutputFormat}
+import org.apache.parquet.format.converter.ParquetMetadataConverter
+import org.apache.parquet.hadoop.ParquetOutputFormat
 import org.apache.parquet.hadoop.ParquetWriter.DEFAULT_BLOCK_SIZE
+import org.apache.parquet.hadoop.util.HadoopInputFile
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.QueryTest
@@ -43,7 +45,8 @@ class ParquetRowIndexSuite extends QueryTest with 
SharedSparkSession {
   import testImplicits._
 
   private def readRowGroupRowCounts(path: String): Seq[Long] = {
-    ParquetFileReader.readFooter(spark.sessionState.newHadoopConf(), new 
Path(path))
+    val inputFile = HadoopInputFile.fromPath(new Path(path), 
spark.sessionState.newHadoopConf())
+    ParquetFooterReader.readFooter(inputFile, 
ParquetMetadataConverter.NO_FILTER)
       .getBlocks.asScala.toSeq.map(_.getRowCount)
   }
 
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala
index 09ed6955a516..2fc42f19743f 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala
@@ -21,7 +21,8 @@ import java.io.File
 import org.apache.hadoop.fs.Path
 import org.apache.parquet.column.{Encoding, ParquetProperties}
 import org.apache.parquet.format.converter.ParquetMetadataConverter
-import org.apache.parquet.hadoop.{ParquetFileReader, ParquetOutputFormat}
+import org.apache.parquet.hadoop.ParquetOutputFormat
+import org.apache.parquet.hadoop.util.HadoopInputFile
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.{DataFrame, QueryTest, Row}
@@ -145,10 +146,10 @@ class ParquetTypeWideningSuite
    */
   private def assertAllParquetFilesDictionaryEncoded(dir: File): Unit = {
     dir.listFiles(_.getName.endsWith(".parquet")).foreach { file =>
-      val parquetMetadata = ParquetFileReader.readFooter(
-        spark.sessionState.newHadoopConf(),
-        new Path(dir.toString, file.getName),
-        ParquetMetadataConverter.NO_FILTER)
+      val inputFile = HadoopInputFile.fromPath(
+        new Path(dir.toString, file.getName), 
spark.sessionState.newHadoopConf())
+      val parquetMetadata =
+        ParquetFooterReader.readFooter(inputFile, 
ParquetMetadataConverter.NO_FILTER)
       parquetMetadata.getBlocks.forEach { block =>
         block.getColumns.forEach { col =>
           assert(
@@ -166,10 +167,10 @@ class ParquetTypeWideningSuite
    */
   private def assertParquetV2Encoding(dir: File, expected_encoding: Encoding): 
Unit = {
     dir.listFiles(_.getName.endsWith(".parquet")).foreach { file =>
-      val parquetMetadata = ParquetFileReader.readFooter(
-        spark.sessionState.newHadoopConf(),
-        new Path(dir.toString, file.getName),
-        ParquetMetadataConverter.NO_FILTER)
+      val inputFile = HadoopInputFile.fromPath(
+        new Path(dir.toString, file.getName), 
spark.sessionState.newHadoopConf())
+      val parquetMetadata =
+        ParquetFooterReader.readFooter(inputFile, 
ParquetMetadataConverter.NO_FILTER)
       parquetMetadata.getBlocks.forEach { block =>
         block.getColumns.forEach { col =>
           assert(
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
index 8fd37234f08e..d4d190c3573f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
@@ -22,11 +22,12 @@ import java.time.{Duration, Period}
 import java.time.temporal.ChronoUnit
 
 import org.apache.hadoop.fs.Path
-import org.apache.parquet.hadoop.ParquetFileReader
+import org.apache.parquet.format.converter.ParquetMetadataConverter
+import org.apache.parquet.hadoop.util.HadoopInputFile
 
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import 
org.apache.spark.sql.execution.datasources.parquet.{ParquetCompressionCodec, 
ParquetTest}
+import 
org.apache.spark.sql.execution.datasources.parquet.{ParquetCompressionCodec, 
ParquetFooterReader, ParquetTest}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 
@@ -214,7 +215,8 @@ class HiveParquetSuite extends QueryTest
 
       val conf = spark.sessionState.newHadoopConf()
       val file = parquetFiles.head
-      val footer = ParquetFileReader.readFooter(conf, new 
Path(file.getAbsolutePath))
+      val inputFile = HadoopInputFile.fromPath(new Path(file.getAbsolutePath), 
conf)
+      val footer = ParquetFooterReader.readFooter(inputFile, 
ParquetMetadataConverter.NO_FILTER)
 
       val codec = footer.getBlocks.get(0).getColumns.get(0).getCodec.name()
       
assert(codec.equalsIgnoreCase(ParquetCompressionCodec.SNAPPY.lowerCaseName()),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-55305][SQL][TESTS] Use `ParquetFooterReader.readFooter` uniformly in test code to read the footer

Reply via email to