This is an automated email from the ASF dual-hosted git repository.
yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new fdabe08294e6 [SPARK-48490][CORE][FOLLOWUP] Properly process escape
sequences
fdabe08294e6 is described below
commit fdabe08294e60425cd34c5d3e7b3efd522a1e0c9
Author: Gengliang Wang <[email protected]>
AuthorDate: Fri Jun 21 17:17:17 2024 +0800
[SPARK-48490][CORE][FOLLOWUP] Properly process escape sequences
### What changes were proposed in this pull request?
Even with the fix in https://github.com/apache/spark/pull/46824, the escape
sequences (`\r`, `\n`, `\t`, etc) are not handled properly. For example, when
we use `log"\n"`, the StringContext interprets `\n` as a literal backslash `\`
followed by `n` instead of a newline character. As a result, the bytes of
`log"\n".message` becomes `[92, 110]`, instead of `[10]`.
This PR is to fix the issue by using the method
StringContext.processEscapes in `LogStringContext`.
### Why are the changes needed?
To ensure that escape sequences are properly processed in Spark logs
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
New UT
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #47050 from gengliangwang/fixEscape.
Authored-by: Gengliang Wang <[email protected]>
Signed-off-by: Kent Yao <[email protected]>
---
.../utils/src/main/scala/org/apache/spark/internal/Logging.scala | 7 +++----
.../scala/org/apache/spark/util/StructuredLoggingSuite.scala | 9 +++++++++
2 files changed, 12 insertions(+), 4 deletions(-)
diff --git
a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
index 9500ed4b174e..1d43bda6e4fc 100644
--- a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
+++ b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
@@ -19,7 +19,6 @@ package org.apache.spark.internal
import scala.jdk.CollectionConverters._
-import org.apache.commons.text.StringEscapeUtils
import org.apache.logging.log4j.{CloseableThreadContext, Level, LogManager}
import org.apache.logging.log4j.core.{Filter, LifeCycle, LogEvent, Logger =>
Log4jLogger, LoggerContext}
import org.apache.logging.log4j.core.appender.ConsoleAppender
@@ -100,7 +99,7 @@ case class MessageWithContext(message: String, context:
java.util.HashMap[String
* Companion class for lazy evaluation of the MessageWithContext instance.
*/
class LogEntry(messageWithContext: => MessageWithContext) {
- def message: String =
StringEscapeUtils.unescapeJava(messageWithContext.message)
+ def message: String = messageWithContext.message
def context: java.util.HashMap[String, String] = messageWithContext.context
}
@@ -144,7 +143,7 @@ trait Logging {
implicit class LogStringContext(val sc: StringContext) {
def log(args: MDC*): MessageWithContext = {
val processedParts = sc.parts.iterator
- val sb = new StringBuilder(processedParts.next())
+ val sb = new
StringBuilder(StringContext.processEscapes(processedParts.next()))
val context = new java.util.HashMap[String, String]()
args.foreach { mdc =>
@@ -155,7 +154,7 @@ trait Logging {
}
if (processedParts.hasNext) {
- sb.append(processedParts.next())
+ sb.append(StringContext.processEscapes(processedParts.next()))
}
}
diff --git
a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala
b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala
index 598ae90402a8..10c240991bf3 100644
---
a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala
+++
b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala
@@ -360,6 +360,15 @@ class StructuredLoggingSuite extends LoggingSuiteBase {
}""")
assert(pattern1.r.matches(logOutput) || pattern2.r.matches(logOutput))
}
+
+ test("process escape sequences") {
+ assert(log"\n".message == "\n")
+ assert(log"\t".message == "\t")
+ assert(log"\b".message == "\b")
+ assert(log"\r".message == "\r")
+ assert((log"\r" + log"\n" + log"\t" + log"\b").message == "\r\n\t\b")
+ assert((log"\r${MDC(LogKeys.EXECUTOR_ID, 1)}\n".message == "\r1\n"))
+ }
}
object CustomLogKeys {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]