Repository: spark Updated Branches: refs/heads/master f99cb5615 -> c84acd4aa
[SPARK-9331][SQL] Add a code formatter to auto-format generated code. The generated expression code can be hard to read since they are not indented well. This patch adds a code formatter that formats code automatically when we output them to the screen. Author: Reynold Xin <[email protected]> Closes #7656 from rxin/codeformatter and squashes the following commits: 5ba0e90 [Reynold Xin] [SPARK-9331][SQL] Add a code formatter to auto-format generated code. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c84acd4a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c84acd4a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c84acd4a Branch: refs/heads/master Commit: c84acd4aa4f8bee98baa550cd6801c797a8a7a25 Parents: f99cb56 Author: Reynold Xin <[email protected]> Authored: Fri Jul 24 19:35:24 2015 -0700 Committer: Reynold Xin <[email protected]> Committed: Fri Jul 24 19:35:24 2015 -0700 ---------------------------------------------------------------------- .../expressions/codegen/CodeFormatter.scala | 60 ++++++++++++++++ .../expressions/codegen/CodeGenerator.scala | 2 +- .../codegen/GenerateMutableProjection.scala | 11 +-- .../expressions/codegen/GenerateOrdering.scala | 2 +- .../expressions/codegen/GeneratePredicate.scala | 2 +- .../codegen/GenerateProjection.scala | 3 +- .../codegen/GenerateUnsafeProjection.scala | 2 +- .../codegen/CodeFormatterSuite.scala | 76 ++++++++++++++++++++ 8 files changed, 148 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/c84acd4a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala new file mode 100644 index 0000000..2087cc7 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatter.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions.codegen + +/** + * An utility class that indents a block of code based on the curly braces. + * + * This is used to prettify generated code when in debug mode (or exceptions). + * + * Written by Matei Zaharia. + */ +object CodeFormatter { + def format(code: String): String = new CodeFormatter().addLines(code).result() +} + +private class CodeFormatter { + private val code = new StringBuilder + private var indentLevel = 0 + private val indentSize = 2 + private var indentString = "" + + private def addLine(line: String): Unit = { + val indentChange = line.count(_ == '{') - line.count(_ == '}') + val newIndentLevel = math.max(0, indentLevel + indentChange) + // Lines starting with '}' should be de-indented even if they contain '{' after; + // in addition, lines ending with ':' are typically labels + val thisLineIndent = if (line.startsWith("}") || line.endsWith(":")) { + " " * (indentSize * (indentLevel - 1)) + } else { + indentString + } + code.append(thisLineIndent) + code.append(line) + code.append("\n") + indentLevel = newIndentLevel + indentString = " " * (indentSize * newIndentLevel) + } + + private def addLines(code: String): CodeFormatter = { + code.split('\n').foreach(s => addLine(s.trim())) + this + } + + private def result(): String = code.result() +} http://git-wip-us.apache.org/repos/asf/spark/blob/c84acd4a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 4a90f1b..508882a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -299,7 +299,7 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin evaluator.cook(code) } catch { case e: Exception => - val msg = s"failed to compile:\n $code" + val msg = "failed to compile:\n " + CodeFormatter.format(code) logError(msg, e) throw new Exception(msg, e) } http://git-wip-us.apache.org/repos/asf/spark/blob/c84acd4a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala index d838268..825031a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala @@ -17,11 +17,11 @@ package org.apache.spark.sql.catalyst.expressions.codegen +import scala.collection.mutable.ArrayBuffer + import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp -import scala.collection.mutable.ArrayBuffer - // MutableProjection is not accessible in Java abstract class BaseMutableProjection extends MutableProjection @@ -45,10 +45,11 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], () => Mu val evaluationCode = e.gen(ctx) evaluationCode.code + s""" - if(${evaluationCode.isNull}) + if (${evaluationCode.isNull}) { mutableRow.setNullAt($i); - else + } else { ${ctx.setColumn("mutableRow", e.dataType, i, evaluationCode.primitive)}; + } """ } // collect projections into blocks as function has 64kb codesize limit in JVM @@ -119,7 +120,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], () => Mu } """ - logDebug(s"code for ${expressions.mkString(",")}:\n$code") + logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}") val c = compile(code) () => { http://git-wip-us.apache.org/repos/asf/spark/blob/c84acd4a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 2e6f9e2..dbd4616 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -107,7 +107,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR } }""" - logDebug(s"Generated Ordering: $code") + logDebug(s"Generated Ordering: ${CodeFormatter.format(code)}") compile(code).generate(ctx.references.toArray).asInstanceOf[BaseOrdering] } http://git-wip-us.apache.org/repos/asf/spark/blob/c84acd4a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala index 1dda599..dfd593f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala @@ -60,7 +60,7 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool } }""" - logDebug(s"Generated predicate '$predicate':\n$code") + logDebug(s"Generated predicate '$predicate':\n${CodeFormatter.format(code)}") val p = compile(code).generate(ctx.references.toArray).asInstanceOf[Predicate] (r: InternalRow) => p.eval(r) http://git-wip-us.apache.org/repos/asf/spark/blob/c84acd4a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala index f0efc4b..a361b21 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala @@ -230,7 +230,8 @@ object GenerateProjection extends CodeGenerator[Seq[Expression], Projection] { } """ - logDebug(s"MutableRow, initExprs: ${expressions.mkString(",")} code:\n${code}") + logDebug(s"MutableRow, initExprs: ${expressions.mkString(",")} code:\n" + + CodeFormatter.format(code)) compile(code).generate(ctx.references.toArray).asInstanceOf[Projection] } http://git-wip-us.apache.org/repos/asf/spark/blob/c84acd4a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala index d65e5c3..0320bcb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala @@ -114,7 +114,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro } """ - logDebug(s"code for ${expressions.mkString(",")}:\n$code") + logDebug(s"code for ${expressions.mkString(",")}:\n${CodeFormatter.format(code)}") val c = compile(code) c.generate(ctx.references.toArray).asInstanceOf[UnsafeProjection] http://git-wip-us.apache.org/repos/asf/spark/blob/c84acd4a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala new file mode 100644 index 0000000..478702f --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeFormatterSuite.scala @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions.codegen + +import org.apache.spark.SparkFunSuite + + +class CodeFormatterSuite extends SparkFunSuite { + + def testCase(name: String)(input: String)(expected: String): Unit = { + test(name) { + assert(CodeFormatter.format(input).trim === expected.trim) + } + } + + testCase("basic example") { + """ + |class A { + |blahblah; + |} + """.stripMargin + }{ + """ + |class A { + | blahblah; + |} + """.stripMargin + } + + testCase("nested example") { + """ + |class A { + | if (c) { + |duh; + |} + |} + """.stripMargin + } { + """ + |class A { + | if (c) { + | duh; + | } + |} + """.stripMargin + } + + testCase("single line") { + """ + |class A { + | if (c) {duh;} + |} + """.stripMargin + }{ + """ + |class A { + | if (c) {duh;} + |} + """.stripMargin + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
