yabola commented on code in PR #39950:
URL: https://github.com/apache/spark/pull/39950#discussion_r1167676059
##########
sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReader.java:
##########
@@ -17,23 +17,57 @@
package org.apache.spark.sql.execution.datasources.parquet;
+import java.io.IOException;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileSplit;
import org.apache.parquet.HadoopReadOptions;
import org.apache.parquet.ParquetReadOptions;
import org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.hadoop.util.HadoopInputFile;
-import java.io.IOException;
+import org.apache.spark.sql.execution.datasources.PartitionedFile;
/**
* `ParquetFooterReader` is a util class which encapsulates the helper
* methods of reading parquet file footer
*/
public class ParquetFooterReader {
+
+ public static final boolean SKIP_ROW_GROUPS = true;
+ public static final boolean WITH_ROW_GROUPS = false;
+
+ /**
+ * Reads footer for the input Parquet file 'split'. If 'skipRowGroup' is
true,
+ * this will skip reading the Parquet row group metadata.
+ *
+ * @param partitionedFile a part (i.e. "block") of a single file that should
be read
+ * @param configuration hadoop configuration of file
+ * @param skipRowGroup If true, skip reading row groups;
+ * if false, read row groups according to the file split
range
+ */
+ public static ParquetMetadata readFooter(
+ Configuration configuration,
+ PartitionedFile partitionedFile,
+ boolean skipRowGroup) throws IOException {
+ FileSplit split = new FileSplit(partitionedFile.toPath(),
partitionedFile.start(),
Review Comment:
Good idea, done~
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]