Fix SPARK-1413: Parquet messes up stdout and stdin when used in Spark REPL Author: witgo <wi...@qq.com>
Closes #325 from witgo/SPARK-1413 and squashes the following commits: e57cd8e [witgo] use scala reflection to access and call the SLF4JBridgeHandler methods 45c8f40 [witgo] Merge branch 'master' of https://github.com/apache/spark into SPARK-1413 5e35d87 [witgo] Merge branch 'master' of https://github.com/apache/spark into SPARK-1413 0d5f819 [witgo] review commit 45e5b70 [witgo] Merge branch 'master' of https://github.com/apache/spark into SPARK-1413 fa69dcf [witgo] Merge branch 'master' into SPARK-1413 3c98dc4 [witgo] Merge branch 'master' into SPARK-1413 38160cb [witgo] Merge branch 'master' of https://github.com/apache/spark into SPARK-1413 ba09bcd [witgo] remove set the parquet log level a63d574 [witgo] Merge branch 'master' of https://github.com/apache/spark into SPARK-1413 5231ecd [witgo] Merge branch 'master' of https://github.com/apache/spark into SPARK-1413 3feb635 [witgo] parquet logger use parent handler fa00d5d [witgo] Merge branch 'master' of https://github.com/apache/spark into SPARK-1413 8bb6ffd [witgo] enableLogForwarding note fix edd9630 [witgo] move to f447f50 [witgo] merging master 5ad52bd [witgo] Merge branch 'master' of https://github.com/apache/spark into SPARK-1413 76670c1 [witgo] review commit 70f3c64 [witgo] Fix SPARK-1413 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a74fbbbc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a74fbbbc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a74fbbbc Branch: refs/heads/branch-1.0 Commit: a74fbbbca8f0d89b2e0e4e8751a93d33efc4fa9e Parents: e6d4a74 Author: witgo <wi...@qq.com> Authored: Thu Apr 10 10:35:24 2014 -0700 Committer: Patrick Wendell <pwend...@gmail.com> Committed: Thu Apr 10 10:36:20 2014 -0700 ---------------------------------------------------------------------- .../main/scala/org/apache/spark/Logging.scala | 20 ++++++++++--- .../spark/sql/parquet/ParquetRelation.scala | 31 +++++--------------- 2 files changed, 23 insertions(+), 28 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/a74fbbbc/core/src/main/scala/org/apache/spark/Logging.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/Logging.scala b/core/src/main/scala/org/apache/spark/Logging.scala index e5e1561..9d429dc 100644 --- a/core/src/main/scala/org/apache/spark/Logging.scala +++ b/core/src/main/scala/org/apache/spark/Logging.scala @@ -28,7 +28,7 @@ import org.apache.spark.annotation.DeveloperApi * Utility trait for classes that want to log data. Creates a SLF4J logger for the class and allows * logging messages at different levels using methods that only evaluate parameters lazily if the * log level is enabled. - * + * * NOTE: DO NOT USE this class outside of Spark. It is intended as an internal utility. * This will likely be changed or removed in future releases. */ @@ -60,7 +60,7 @@ trait Logging { protected def logDebug(msg: => String) { if (log.isDebugEnabled) log.debug(msg) } - + protected def logTrace(msg: => String) { if (log.isTraceEnabled) log.trace(msg) } @@ -117,10 +117,10 @@ trait Logging { val defaultLogProps = "org/apache/spark/log4j-defaults.properties" val classLoader = this.getClass.getClassLoader Option(classLoader.getResource(defaultLogProps)) match { - case Some(url) => + case Some(url) => PropertyConfigurator.configure(url) log.info(s"Using Spark's default log4j profile: $defaultLogProps") - case None => + case None => System.err.println(s"Spark was unable to load $defaultLogProps") } } @@ -135,4 +135,16 @@ trait Logging { private object Logging { @volatile private var initialized = false val initLock = new Object() + try { + // We use reflection here to handle the case where users remove the + // slf4j-to-jul bridge order to route their logs to JUL. + val bridgeClass = Class.forName("org.slf4j.bridge.SLF4JBridgeHandler") + bridgeClass.getMethod("removeHandlersForRootLogger").invoke(null) + val installed = bridgeClass.getMethod("isInstalled").invoke(null).asInstanceOf[Boolean] + if (!installed) { + bridgeClass.getMethod("install").invoke(null) + } + } catch { + case e: ClassNotFoundException => // can't log anything yet so just fail silently + } } http://git-wip-us.apache.org/repos/asf/spark/blob/a74fbbbc/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala index 505ad0a..4d7c86a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetRelation.scala @@ -82,30 +82,13 @@ private[sql] case class ParquetRelation(val path: String) private[sql] object ParquetRelation { def enableLogForwarding() { - // Note: Parquet does not use forwarding to parent loggers which - // is required for the JUL-SLF4J bridge to work. Also there is - // a default logger that appends to Console which needs to be - // reset. - import org.slf4j.bridge.SLF4JBridgeHandler - import java.util.logging.Logger - import java.util.logging.LogManager - - val loggerNames = Seq( - "parquet.hadoop.ColumnChunkPageWriteStore", - "parquet.hadoop.InternalParquetRecordWriter", - "parquet.hadoop.ParquetRecordReader", - "parquet.hadoop.ParquetInputFormat", - "parquet.hadoop.ParquetOutputFormat", - "parquet.hadoop.ParquetFileReader", - "parquet.hadoop.InternalParquetRecordReader", - "parquet.hadoop.codec.CodecConfig") - LogManager.getLogManager.reset() - SLF4JBridgeHandler.install() - for(name <- loggerNames) { - val logger = Logger.getLogger(name) - logger.setParent(Logger.getLogger(Logger.GLOBAL_LOGGER_NAME)) - logger.setUseParentHandlers(true) - } + // Note: Logger.getLogger("parquet") has a default logger + // that appends to Console which needs to be cleared. + val parquetLogger = java.util.logging.Logger.getLogger("parquet") + parquetLogger.getHandlers.foreach(parquetLogger.removeHandler) + // TODO(witgo): Need to set the log level ? + // if(parquetLogger.getLevel != null) parquetLogger.setLevel(null) + if (!parquetLogger.getUseParentHandlers) parquetLogger.setUseParentHandlers(true) } // The element type for the RDDs that this relation maps to.