This is an automated email from the ASF dual-hosted git repository.
huaxingao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new cc560ea8585 [SPARK-39156][SQL] Clean up the usage of
`ParquetLogRedirector` in `ParquetFileFormat`
cc560ea8585 is described below
commit cc560ea8585d845af9a01ced1c536036f88e7ba7
Author: yangjie01 <[email protected]>
AuthorDate: Sun May 15 08:34:09 2022 -0700
[SPARK-39156][SQL] Clean up the usage of `ParquetLogRedirector` in
`ParquetFileFormat`
### What changes were proposed in this pull request?
SPARK-17993 introduce `ParquetLogRedirector` for Parquet version < 1.9,
[PARQUET-305](https://issues.apache.org/jira/browse/PARQUET-305) change to use
slf4j instead of jul in Parquet 1.9, Spark uses Parquet 1.12.2 now and [no
longer relies on Parquet version
1.6](https://github.com/apache/spark/pull/30005) now , the
`ParquetLogRedirector` is no longer needed, so this pr clean up the usage of
`ParquetLogRedirector` in `ParquetFileFormat`.
### Why are the changes needed?
Clean up the usage of `ParquetLogRedirector` in `ParquetFileFormat`.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
- Pass GA
- Manual test:
1. Build Spark client manually before and after this pr
2. Change parquet log4j level to debug:
```
logger.parquet1.name = org.apache.parquet
logger.parquet1.level = debug
logger.parquet2.name = parquet
logger.parquet2.level = debug
```
3. Try to read Parquet file write with 1.6 , for example
`sql/core/src/test/resources/test-data/dec-in-i32.parquet`.
```
java -jar parquet-tools-1.10.1.jar meta /${basedir}/dec-in-i32.parquet
file: file:/${basedir}/dec-in-i32.parquet
creator: parquet-mr version 1.6.0
extra: org.apache.spark.sql.parquet.row.metadata =
{"type":"struct","fields":[{"name":"i32_dec","type":"decimal(5,2)","nullable":true,"metadata":{}}]}
file schema: spark_schema
--------------------------------------------------------------------------------
i32_dec: OPTIONAL INT32 O:DECIMAL R:0 D:1
row group 1: RC:16 TS:102 OFFSET:4
--------------------------------------------------------------------------------
i32_dec: INT32 GZIP DO:0 FPO:4 SZ:131/102/0.78 VC:16
ENC:RLE,PLAIN_DICTIONARY,BIT_PACKED ST:[no stats for this column]
```
```
spark.read.parquet("file://${basedir}/ptable/dec-in-i32.parquet").show()
```
The log contents before and after this pr are consistent, and there is no
error log mentioned in SPARK-17993
Closes #36515 from LuciferYang/remove-parquetLogRedirector.
Authored-by: yangjie01 <[email protected]>
Signed-off-by: huaxingao <[email protected]>
---
.../datasources/parquet/ParquetLogRedirector.java | 72 ----------------------
.../datasources/parquet/ParquetFileFormat.scala | 10 ---
2 files changed, 82 deletions(-)
diff --git
a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetLogRedirector.java
b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetLogRedirector.java
deleted file mode 100644
index 7a7f32ee1e8..00000000000
---
a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetLogRedirector.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.execution.datasources.parquet;
-
-import java.io.Serializable;
-import java.util.logging.Handler;
-import java.util.logging.Logger;
-
-import org.apache.parquet.Log;
-import org.slf4j.bridge.SLF4JBridgeHandler;
-
-// Redirects the JUL logging for parquet-mr versions <= 1.8 to SLF4J logging
using
-// SLF4JBridgeHandler. Parquet-mr versions >= 1.9 use SLF4J directly
-final class ParquetLogRedirector implements Serializable {
- // Client classes should hold a reference to INSTANCE to ensure redirection
occurs. This is
- // especially important for Serializable classes where fields are set but
constructors are
- // ignored
- static final ParquetLogRedirector INSTANCE = new ParquetLogRedirector();
-
- // JUL loggers must be held by a strong reference, otherwise they may get
destroyed by GC.
- // However, the root JUL logger used by Parquet isn't properly referenced.
Here we keep
- // references to loggers in both parquet-mr <= 1.6 and 1.7/1.8
- private static final Logger apacheParquetLogger =
- Logger.getLogger(Log.class.getPackage().getName());
- private static final Logger parquetLogger = Logger.getLogger("parquet");
-
- static {
- // For parquet-mr 1.7 and 1.8, which are under `org.apache.parquet`
namespace.
- try {
- Class.forName(Log.class.getName());
- redirect(Logger.getLogger(Log.class.getPackage().getName()));
- } catch (ClassNotFoundException ex) {
- throw new RuntimeException(ex);
- }
-
- // For parquet-mr 1.6.0 and lower versions bundled with Hive, which are
under `parquet`
- // namespace.
- try {
- Class.forName("parquet.Log");
- redirect(Logger.getLogger("parquet"));
- } catch (Throwable t) {
- // SPARK-9974: com.twitter:parquet-hadoop-bundle:1.6.0 is not packaged
into the assembly
- // when Spark is built with SBT. So `parquet.Log` may not be found.
This try/catch block
- // should be removed after this issue is fixed.
- }
- }
-
- private ParquetLogRedirector() {
- }
-
- private static void redirect(Logger logger) {
- for (Handler handler : logger.getHandlers()) {
- logger.removeHandler(handler);
- }
- logger.setUseParentHandlers(false);
- logger.addHandler(new SLF4JBridgeHandler());
- }
-}
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index d9b24565ccc..9534be81928 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -58,11 +58,6 @@ class ParquetFileFormat
with DataSourceRegister
with Logging
with Serializable {
- // Hold a reference to the (serializable) singleton instance of
ParquetLogRedirector. This
- // ensures the ParquetLogRedirector class is initialized whether an instance
of ParquetFileFormat
- // is constructed or deserialized. Do not heed the Scala compiler's warning
about an unused field
- // here.
- private val parquetLogRedirector = ParquetLogRedirector.INSTANCE
override def shortName(): String = "parquet"
@@ -146,11 +141,6 @@ class ParquetFileFormat
}
new OutputWriterFactory {
- // This OutputWriterFactory instance is deserialized when writing
Parquet files on the
- // executor side without constructing or deserializing
ParquetFileFormat. Therefore, we hold
- // another reference to ParquetLogRedirector.INSTANCE here to ensure the
latter class is
- // initialized.
- private val parquetLogRedirector = ParquetLogRedirector.INSTANCE
override def newInstance(
path: String,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]