This is an automated email from the ASF dual-hosted git repository.

huaxingao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new cc560ea8585 [SPARK-39156][SQL] Clean up the usage of 
`ParquetLogRedirector` in `ParquetFileFormat`
cc560ea8585 is described below

commit cc560ea8585d845af9a01ced1c536036f88e7ba7
Author: yangjie01 <[email protected]>
AuthorDate: Sun May 15 08:34:09 2022 -0700

    [SPARK-39156][SQL] Clean up the usage of `ParquetLogRedirector` in 
`ParquetFileFormat`
    
    ### What changes were proposed in this pull request?
    SPARK-17993 introduce `ParquetLogRedirector` for Parquet version < 1.9, 
[PARQUET-305](https://issues.apache.org/jira/browse/PARQUET-305) change to use 
slf4j instead of jul in Parquet 1.9,  Spark uses Parquet 1.12.2 now  and [no 
longer relies on Parquet version 
1.6](https://github.com/apache/spark/pull/30005) now , the 
`ParquetLogRedirector` is no longer needed, so this pr clean up the usage of 
`ParquetLogRedirector` in `ParquetFileFormat`.
    
    ### Why are the changes needed?
    Clean up the usage of `ParquetLogRedirector` in `ParquetFileFormat`.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    
    - Pass GA
    - Manual test:
    
    1. Build Spark client manually before and after this pr
    2. Change parquet log4j level to debug:
    ```
    logger.parquet1.name = org.apache.parquet
    logger.parquet1.level = debug
    logger.parquet2.name = parquet
    logger.parquet2.level = debug
    ```
    3. Try to read Parquet file write with 1.6 , for example 
`sql/core/src/test/resources/test-data/dec-in-i32.parquet`.
    
    ```
    java -jar parquet-tools-1.10.1.jar meta /${basedir}/dec-in-i32.parquet
    file:        file:/${basedir}/dec-in-i32.parquet
    creator:     parquet-mr version 1.6.0
    extra:       org.apache.spark.sql.parquet.row.metadata = 
{"type":"struct","fields":[{"name":"i32_dec","type":"decimal(5,2)","nullable":true,"metadata":{}}]}
    
    file schema: spark_schema
    
--------------------------------------------------------------------------------
    i32_dec:     OPTIONAL INT32 O:DECIMAL R:0 D:1
    
    row group 1: RC:16 TS:102 OFFSET:4
    
--------------------------------------------------------------------------------
    i32_dec:      INT32 GZIP DO:0 FPO:4 SZ:131/102/0.78 VC:16 
ENC:RLE,PLAIN_DICTIONARY,BIT_PACKED ST:[no stats for this column]
    ```
    
    ```
    spark.read.parquet("file://${basedir}/ptable/dec-in-i32.parquet").show()
    ```
    
    The log contents before and after this pr are consistent, and there is no 
error log mentioned in SPARK-17993
    
    Closes #36515 from LuciferYang/remove-parquetLogRedirector.
    
    Authored-by: yangjie01 <[email protected]>
    Signed-off-by: huaxingao <[email protected]>
---
 .../datasources/parquet/ParquetLogRedirector.java  | 72 ----------------------
 .../datasources/parquet/ParquetFileFormat.scala    | 10 ---
 2 files changed, 82 deletions(-)

diff --git 
a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetLogRedirector.java
 
b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetLogRedirector.java
deleted file mode 100644
index 7a7f32ee1e8..00000000000
--- 
a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetLogRedirector.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.execution.datasources.parquet;
-
-import java.io.Serializable;
-import java.util.logging.Handler;
-import java.util.logging.Logger;
-
-import org.apache.parquet.Log;
-import org.slf4j.bridge.SLF4JBridgeHandler;
-
-// Redirects the JUL logging for parquet-mr versions <= 1.8 to SLF4J logging 
using
-// SLF4JBridgeHandler. Parquet-mr versions >= 1.9 use SLF4J directly
-final class ParquetLogRedirector implements Serializable {
-  // Client classes should hold a reference to INSTANCE to ensure redirection 
occurs. This is
-  // especially important for Serializable classes where fields are set but 
constructors are
-  // ignored
-  static final ParquetLogRedirector INSTANCE = new ParquetLogRedirector();
-
-  // JUL loggers must be held by a strong reference, otherwise they may get 
destroyed by GC.
-  // However, the root JUL logger used by Parquet isn't properly referenced.  
Here we keep
-  // references to loggers in both parquet-mr <= 1.6 and 1.7/1.8
-  private static final Logger apacheParquetLogger =
-    Logger.getLogger(Log.class.getPackage().getName());
-  private static final Logger parquetLogger = Logger.getLogger("parquet");
-
-  static {
-    // For parquet-mr 1.7 and 1.8, which are under `org.apache.parquet` 
namespace.
-    try {
-      Class.forName(Log.class.getName());
-      redirect(Logger.getLogger(Log.class.getPackage().getName()));
-    } catch (ClassNotFoundException ex) {
-      throw new RuntimeException(ex);
-    }
-
-    // For parquet-mr 1.6.0 and lower versions bundled with Hive, which are 
under `parquet`
-    // namespace.
-    try {
-      Class.forName("parquet.Log");
-      redirect(Logger.getLogger("parquet"));
-    } catch (Throwable t) {
-      // SPARK-9974: com.twitter:parquet-hadoop-bundle:1.6.0 is not packaged 
into the assembly
-      // when Spark is built with SBT. So `parquet.Log` may not be found.  
This try/catch block
-      // should be removed after this issue is fixed.
-    }
-  }
-
-  private ParquetLogRedirector() {
-  }
-
-  private static void redirect(Logger logger) {
-    for (Handler handler : logger.getHandlers()) {
-      logger.removeHandler(handler);
-    }
-    logger.setUseParentHandlers(false);
-    logger.addHandler(new SLF4JBridgeHandler());
-  }
-}
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index d9b24565ccc..9534be81928 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -58,11 +58,6 @@ class ParquetFileFormat
   with DataSourceRegister
   with Logging
   with Serializable {
-  // Hold a reference to the (serializable) singleton instance of 
ParquetLogRedirector. This
-  // ensures the ParquetLogRedirector class is initialized whether an instance 
of ParquetFileFormat
-  // is constructed or deserialized. Do not heed the Scala compiler's warning 
about an unused field
-  // here.
-  private val parquetLogRedirector = ParquetLogRedirector.INSTANCE
 
   override def shortName(): String = "parquet"
 
@@ -146,11 +141,6 @@ class ParquetFileFormat
     }
 
     new OutputWriterFactory {
-      // This OutputWriterFactory instance is deserialized when writing 
Parquet files on the
-      // executor side without constructing or deserializing 
ParquetFileFormat. Therefore, we hold
-      // another reference to ParquetLogRedirector.INSTANCE here to ensure the 
latter class is
-      // initialized.
-      private val parquetLogRedirector = ParquetLogRedirector.INSTANCE
 
         override def newInstance(
           path: String,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to