Github user hvanhovell commented on a diff in the pull request:
https://github.com/apache/spark/pull/22429#discussion_r223983537
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala ---
@@ -167,6 +172,58 @@ package object util {
builder.toString()
}
+ /**
+ * The performance overhead of creating and logging strings for wide
schemas can be large. To
+ * limit the impact, we bound the number of fields to include by
default. This can be overridden
+ * by setting the 'spark.debug.maxToStringFields' conf in SparkEnv or by
settings the SQL config
+ * `spark.sql.debug.maxToStringFields`.
+ */
+ private[spark] def maxNumToStringFields: Int = {
+ val legacyLimit = if (SparkEnv.get != null) {
+ SparkEnv.get.conf.get(config.MAX_TO_STRING_FIELDS)
+ } else {
+ config.MAX_TO_STRING_FIELDS.defaultValue.get
+ }
+ val sqlConfLimit = SQLConf.get.maxToStringFields
+
+ Math.max(sqlConfLimit, legacyLimit)
+ }
+
+ /** Whether we have warned about plan string truncation yet. */
+ private val truncationWarningPrinted = new AtomicBoolean(false)
+
+ /**
+ * Format a sequence with semantics similar to calling .mkString(). Any
elements beyond
+ * maxNumToStringFields will be dropped and replaced by a "... N more
fields" placeholder.
+ *
+ * @return the trimmed and formatted string.
+ */
+ def truncatedString[T](
+ seq: Seq[T],
+ start: String,
+ sep: String,
+ end: String,
+ maxFields: Option[Int]): String = {
+ val maxNumFields = maxFields.getOrElse(maxNumToStringFields)
--- End diff --
You should document this behavior.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]