[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user asfgit closed the pull request at: https://github.com/apache/spark/pull/21193 --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user rednaxelafx commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189785968 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala --- @@ -23,12 +23,20 @@ import org.apache.spark.sql.types.{BooleanType, IntegerType} class CodeBlockSuite extends SparkFunSuite { - test("Block can interpolate string and ExprValue inputs") { + test("Block interpolates string and ExprValue inputs") { val isNull = JavaCode.isNullVariable("expr1_isNull") -val code = code"boolean ${isNull} = ${JavaCode.defaultLiteral(BooleanType)};" +val stringLiteral = "false" +val code = code"boolean $isNull = $stringLiteral;" assert(code.toString == "boolean expr1_isNull = false;") } + test("Literals are folded into string code parts instead of block inputs") { --- End diff -- Great, I like it! --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user rednaxelafx commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189786155 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -167,9 +170,40 @@ object Block { case other => throw new IllegalArgumentException( s"Can not interpolate ${other.getClass.getName} into code block.") } -CodeBlock(sc.parts, args) + +val (codeParts, blockInputs) = foldLiteralArgs(sc.parts, args) +CodeBlock(codeParts, blockInputs) + } +} + } + + // Folds eagerly the literal args into the code parts. + private def foldLiteralArgs(parts: Seq[String], args: Seq[Any]): (Seq[String], Seq[Any]) = { +val codeParts = ArrayBuffer.empty[String] +val blockInputs = ArrayBuffer.empty[Any] + +val strings = parts.iterator +val inputs = args.iterator +val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH) + +buf append strings.next +while (strings.hasNext) { --- End diff -- Looks good --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189767747 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -167,9 +170,40 @@ object Block { case other => throw new IllegalArgumentException( s"Can not interpolate ${other.getClass.getName} into code block.") } -CodeBlock(sc.parts, args) + +val (codeParts, blockInputs) = foldLiteralArgs(sc.parts, args) +CodeBlock(codeParts, blockInputs) + } +} + } + + // Folds eagerly the literal args into the code parts. + private def foldLiteralArgs(parts: Seq[String], args: Seq[Any]): (Seq[String], Seq[Any]) = { +val codeParts = ArrayBuffer.empty[String] +val blockInputs = ArrayBuffer.empty[Any] + +val strings = parts.iterator +val inputs = args.iterator +val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH) + +buf append strings.next --- End diff -- Ok. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189768074 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -167,9 +170,40 @@ object Block { case other => throw new IllegalArgumentException( s"Can not interpolate ${other.getClass.getName} into code block.") } -CodeBlock(sc.parts, args) + +val (codeParts, blockInputs) = foldLiteralArgs(sc.parts, args) +CodeBlock(codeParts, blockInputs) + } +} + } + + // Folds eagerly the literal args into the code parts. + private def foldLiteralArgs(parts: Seq[String], args: Seq[Any]): (Seq[String], Seq[Any]) = { +val codeParts = ArrayBuffer.empty[String] +val blockInputs = ArrayBuffer.empty[Any] --- End diff -- Ok. `JavaCode` is better. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189617272 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -167,9 +170,40 @@ object Block { case other => throw new IllegalArgumentException( s"Can not interpolate ${other.getClass.getName} into code block.") } -CodeBlock(sc.parts, args) + +val (codeParts, blockInputs) = foldLiteralArgs(sc.parts, args) +CodeBlock(codeParts, blockInputs) + } +} + } + + // Folds eagerly the literal args into the code parts. + private def foldLiteralArgs(parts: Seq[String], args: Seq[Any]): (Seq[String], Seq[Any]) = { +val codeParts = ArrayBuffer.empty[String] +val blockInputs = ArrayBuffer.empty[Any] + +val strings = parts.iterator +val inputs = args.iterator +val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH) + +buf append strings.next --- End diff -- can we use java style here? `buf.append(strings.next)`. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189616918 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -167,9 +170,40 @@ object Block { case other => throw new IllegalArgumentException( s"Can not interpolate ${other.getClass.getName} into code block.") } -CodeBlock(sc.parts, args) + +val (codeParts, blockInputs) = foldLiteralArgs(sc.parts, args) +CodeBlock(codeParts, blockInputs) + } +} + } + + // Folds eagerly the literal args into the code parts. + private def foldLiteralArgs(parts: Seq[String], args: Seq[Any]): (Seq[String], Seq[Any]) = { +val codeParts = ArrayBuffer.empty[String] +val blockInputs = ArrayBuffer.empty[Any] --- End diff -- shall we make the type `JavaCode` instead of `Any`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189430164 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,115 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Set[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + def length: Int = toString.length + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) --- End diff -- I think this is a good idea. We should not change literal args. Keeping them in block inputs seems meaningless. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189279129 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala --- @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions.codegen + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.types.{BooleanType, IntegerType} + +class CodeBlockSuite extends SparkFunSuite { + + test("Block can interpolate string and ExprValue inputs") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val code = code"boolean ${isNull} = ${JavaCode.defaultLiteral(BooleanType)};" +assert(code.toString == "boolean expr1_isNull = false;") + } + + test("Block.stripMargin") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val value = JavaCode.variable("expr1", IntegerType) +val code1 = + code""" + |boolean $isNull = false; + |int $value = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin +val expected = + s""" +|boolean expr1_isNull = false; +|int expr1 = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin.trim +assert(code1.toString == expected) + +val code2 = + code""" + >boolean $isNull = false; + >int $value = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin('>') +assert(code2.toString == expected) + } + + test("Block can capture input expr values") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val value = JavaCode.variable("expr1", IntegerType) +val code = + code""" + |boolean $isNull = false; + |int $value = -1; + """.stripMargin +val exprValues = code.exprValues +assert(exprValues.size == 2) +assert(exprValues === Set(value, isNull)) + } + + test("concatenate blocks") { +val isNull1 = JavaCode.isNullVariable("expr1_isNull") +val value1 = JavaCode.variable("expr1", IntegerType) +val isNull2 = JavaCode.isNullVariable("expr2_isNull") +val value2 = JavaCode.variable("expr2", IntegerType) +val literal = JavaCode.literal("100", IntegerType) + +val code = + code""" + |boolean $isNull1 = false; + |int $value1 = -1;""".stripMargin + + code""" + |boolean $isNull2 = true; + |int $value2 = $literal;""".stripMargin + +val expected = + """ + |boolean expr1_isNull = false; + |int expr1 = -1; + |boolean expr2_isNull = true; + |int expr2 = 100;""".stripMargin.trim + +assert(code.toString == expected) + +val exprValues = code.exprValues +assert(exprValues.size == 5) +assert(exprValues === Set(isNull1, value1, isNull2, value2, literal)) + } + + test("Throws exception when interpolating unexcepted object in code block") { +val obj = TestClass(100) +val e = intercept[IllegalArgumentException] { + code"$obj" +} +assert(e.getMessage().contains(s"Can not interpolate ${obj.getClass.getName}")) + } + + test("replace expr values in code block") { +val statement = JavaCode.expression("1 + 1", IntegerType) +val isNull = JavaCode.isNullVariable("expr1_isNull") +val exprInFunc = JavaCode.variable("expr1", IntegerType) + +val code = + code""" + |callFunc(int $statement) { + | boolean $isNull = false; + | int $exprInFunc = $statement + 1; + |}""".stripMargin + +val aliasedParam = JavaCode.variable("aliased", statement.javaType) +val aliasedInputs = code.asInstanceOf[CodeBlock].blockInputs.map {
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189264892 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,115 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Set[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + def length: Int = toString.length + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override lazy val exprValues: Set[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Set(e) + case _ => Set.empty[ExprValue] +}.toSet + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH) +buf append StringContext.treatEscapes(strings.next) +while (strings.hasNext) { + buf append inputs.next + buf append StringContext.treatEscapes(strings.next) +} +buf.toString + } + + override def + (other: Block): Block = other match { +case c: CodeBlock => Blocks(Seq(this, c)) +case b: Blocks => Blocks(Seq(this) ++ b.blocks) +case EmptyBlock => this + } +} + +case class Blocks(blocks: Seq[Block]) extends Block { + override lazy val exprValues: Set[ExprValue] = blocks.flatMap(_.exprValues).toSet + override def code: String = blocks.map(_.toString).mkString("\n") --- End diff -- Yeah, I think so. Ideally I think `JavaCode` should be immutable. Once we change code in it, we should have a new object instead of chancing current one. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189264507 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala --- @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions.codegen + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.types.{BooleanType, IntegerType} + +class CodeBlockSuite extends SparkFunSuite { + + test("Block can interpolate string and ExprValue inputs") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val code = code"boolean ${isNull} = ${JavaCode.defaultLiteral(BooleanType)};" +assert(code.toString == "boolean expr1_isNull = false;") + } + + test("Block.stripMargin") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val value = JavaCode.variable("expr1", IntegerType) +val code1 = + code""" + |boolean $isNull = false; + |int $value = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin +val expected = + s""" +|boolean expr1_isNull = false; +|int expr1 = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin.trim +assert(code1.toString == expected) + +val code2 = + code""" + >boolean $isNull = false; + >int $value = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin('>') +assert(code2.toString == expected) + } + + test("Block can capture input expr values") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val value = JavaCode.variable("expr1", IntegerType) +val code = + code""" + |boolean $isNull = false; + |int $value = -1; + """.stripMargin +val exprValues = code.exprValues +assert(exprValues.size == 2) +assert(exprValues === Set(value, isNull)) + } + + test("concatenate blocks") { +val isNull1 = JavaCode.isNullVariable("expr1_isNull") +val value1 = JavaCode.variable("expr1", IntegerType) +val isNull2 = JavaCode.isNullVariable("expr2_isNull") +val value2 = JavaCode.variable("expr2", IntegerType) +val literal = JavaCode.literal("100", IntegerType) + +val code = + code""" + |boolean $isNull1 = false; + |int $value1 = -1;""".stripMargin + + code""" + |boolean $isNull2 = true; + |int $value2 = $literal;""".stripMargin + +val expected = + """ + |boolean expr1_isNull = false; + |int expr1 = -1; + |boolean expr2_isNull = true; + |int expr2 = 100;""".stripMargin.trim + +assert(code.toString == expected) + +val exprValues = code.exprValues +assert(exprValues.size == 5) +assert(exprValues === Set(isNull1, value1, isNull2, value2, literal)) + } + + test("Throws exception when interpolating unexcepted object in code block") { +val obj = TestClass(100) +val e = intercept[IllegalArgumentException] { + code"$obj" +} +assert(e.getMessage().contains(s"Can not interpolate ${obj.getClass.getName}")) + } + + test("replace expr values in code block") { +val statement = JavaCode.expression("1 + 1", IntegerType) +val isNull = JavaCode.isNullVariable("expr1_isNull") +val exprInFunc = JavaCode.variable("expr1", IntegerType) + +val code = + code""" + |callFunc(int $statement) { + | boolean $isNull = false; + | int $exprInFunc = $statement + 1; + |}""".stripMargin + +val aliasedParam = JavaCode.variable("aliased", statement.javaType) +val aliasedInputs = code.asInstanceOf[CodeBlock].blockInputs.map {
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189259397 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,115 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Set[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + def length: Int = toString.length + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override lazy val exprValues: Set[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Set(e) + case _ => Set.empty[ExprValue] +}.toSet + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH) +buf append StringContext.treatEscapes(strings.next) +while (strings.hasNext) { + buf append inputs.next + buf append StringContext.treatEscapes(strings.next) +} +buf.toString --- End diff -- ah i see --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189259323 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,115 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Set[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + def length: Int = toString.length + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override lazy val exprValues: Set[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Set(e) + case _ => Set.empty[ExprValue] +}.toSet + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH) +buf append StringContext.treatEscapes(strings.next) +while (strings.hasNext) { + buf append inputs.next + buf append StringContext.treatEscapes(strings.next) +} +buf.toString + } + + override def + (other: Block): Block = other match { +case c: CodeBlock => Blocks(Seq(this, c)) +case b: Blocks => Blocks(Seq(this) ++ b.blocks) +case EmptyBlock => this + } +} + +case class Blocks(blocks: Seq[Block]) extends Block { + override lazy val exprValues: Set[ExprValue] = blocks.flatMap(_.exprValues).toSet + override def code: String = blocks.map(_.toString).mkString("\n") --- End diff -- it depends on how we would implement code rewriting later. If we wanna keep `JavaCode` immutable, then `lazy val` is OK here, because once we rewrite the code, we will have a new `Blocks`. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user rednaxelafx commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189229566 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala --- @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions.codegen + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.types.{BooleanType, IntegerType} + +class CodeBlockSuite extends SparkFunSuite { + + test("Block can interpolate string and ExprValue inputs") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val code = code"boolean ${isNull} = ${JavaCode.defaultLiteral(BooleanType)};" +assert(code.toString == "boolean expr1_isNull = false;") + } + + test("Block.stripMargin") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val value = JavaCode.variable("expr1", IntegerType) +val code1 = + code""" + |boolean $isNull = false; + |int $value = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin +val expected = + s""" +|boolean expr1_isNull = false; +|int expr1 = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin.trim +assert(code1.toString == expected) + +val code2 = + code""" + >boolean $isNull = false; + >int $value = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin('>') +assert(code2.toString == expected) + } + + test("Block can capture input expr values") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val value = JavaCode.variable("expr1", IntegerType) +val code = + code""" + |boolean $isNull = false; + |int $value = -1; + """.stripMargin +val exprValues = code.exprValues +assert(exprValues.size == 2) +assert(exprValues === Set(value, isNull)) + } + + test("concatenate blocks") { +val isNull1 = JavaCode.isNullVariable("expr1_isNull") +val value1 = JavaCode.variable("expr1", IntegerType) +val isNull2 = JavaCode.isNullVariable("expr2_isNull") +val value2 = JavaCode.variable("expr2", IntegerType) +val literal = JavaCode.literal("100", IntegerType) + +val code = + code""" + |boolean $isNull1 = false; + |int $value1 = -1;""".stripMargin + + code""" + |boolean $isNull2 = true; + |int $value2 = $literal;""".stripMargin + +val expected = + """ + |boolean expr1_isNull = false; + |int expr1 = -1; + |boolean expr2_isNull = true; + |int expr2 = 100;""".stripMargin.trim + +assert(code.toString == expected) + +val exprValues = code.exprValues +assert(exprValues.size == 5) +assert(exprValues === Set(isNull1, value1, isNull2, value2, literal)) + } + + test("Throws exception when interpolating unexcepted object in code block") { +val obj = TestClass(100) +val e = intercept[IllegalArgumentException] { + code"$obj" +} +assert(e.getMessage().contains(s"Can not interpolate ${obj.getClass.getName}")) + } + + test("replace expr values in code block") { +val statement = JavaCode.expression("1 + 1", IntegerType) +val isNull = JavaCode.isNullVariable("expr1_isNull") +val exprInFunc = JavaCode.variable("expr1", IntegerType) + +val code = + code""" + |callFunc(int $statement) { + | boolean $isNull = false; + | int $exprInFunc = $statement + 1; + |}""".stripMargin + +val aliasedParam = JavaCode.variable("aliased", statement.javaType) +val aliasedInputs =
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user rednaxelafx commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189225184 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,115 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Set[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + def length: Int = toString.length + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) --- End diff -- I'm wondering: does it make sense to eagerly fold the literal args into the `parts`, instead of carrying them over into `CodeBlock`? That's similar to what you're already doing with `CodeBlock.exprValues`, but just done a bit more eagerly. Basically, the only things that are safe to manipulate are the structured ones like `ExprValue` and `Block`; none of the others are safe to change because we don't know what's in there. So no need to even carry them as separate argument into `CodeBlock`. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user rednaxelafx commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189229229 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala --- @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions.codegen + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.types.{BooleanType, IntegerType} + +class CodeBlockSuite extends SparkFunSuite { + + test("Block can interpolate string and ExprValue inputs") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val code = code"boolean ${isNull} = ${JavaCode.defaultLiteral(BooleanType)};" +assert(code.toString == "boolean expr1_isNull = false;") + } + + test("Block.stripMargin") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val value = JavaCode.variable("expr1", IntegerType) +val code1 = + code""" + |boolean $isNull = false; + |int $value = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin +val expected = + s""" +|boolean expr1_isNull = false; +|int expr1 = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin.trim +assert(code1.toString == expected) + +val code2 = + code""" + >boolean $isNull = false; + >int $value = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin('>') +assert(code2.toString == expected) + } + + test("Block can capture input expr values") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val value = JavaCode.variable("expr1", IntegerType) +val code = + code""" + |boolean $isNull = false; + |int $value = -1; + """.stripMargin +val exprValues = code.exprValues +assert(exprValues.size == 2) +assert(exprValues === Set(value, isNull)) + } + + test("concatenate blocks") { +val isNull1 = JavaCode.isNullVariable("expr1_isNull") +val value1 = JavaCode.variable("expr1", IntegerType) +val isNull2 = JavaCode.isNullVariable("expr2_isNull") +val value2 = JavaCode.variable("expr2", IntegerType) +val literal = JavaCode.literal("100", IntegerType) + +val code = + code""" + |boolean $isNull1 = false; + |int $value1 = -1;""".stripMargin + + code""" + |boolean $isNull2 = true; + |int $value2 = $literal;""".stripMargin + +val expected = + """ + |boolean expr1_isNull = false; + |int expr1 = -1; + |boolean expr2_isNull = true; + |int expr2 = 100;""".stripMargin.trim + +assert(code.toString == expected) + +val exprValues = code.exprValues +assert(exprValues.size == 5) +assert(exprValues === Set(isNull1, value1, isNull2, value2, literal)) + } + + test("Throws exception when interpolating unexcepted object in code block") { +val obj = TestClass(100) +val e = intercept[IllegalArgumentException] { + code"$obj" +} +assert(e.getMessage().contains(s"Can not interpolate ${obj.getClass.getName}")) + } + + test("replace expr values in code block") { +val statement = JavaCode.expression("1 + 1", IntegerType) --- End diff -- Nit: can we rename this `statement` variable to something like `expr` or `someExpr` or something? It's an expression not a statement. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189213433 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,115 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Set[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + def length: Int = toString.length + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override lazy val exprValues: Set[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Set(e) + case _ => Set.empty[ExprValue] +}.toSet + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH) +buf append StringContext.treatEscapes(strings.next) +while (strings.hasNext) { + buf append inputs.next + buf append StringContext.treatEscapes(strings.next) +} +buf.toString + } + + override def + (other: Block): Block = other match { +case c: CodeBlock => Blocks(Seq(this, c)) +case b: Blocks => Blocks(Seq(this) ++ b.blocks) +case EmptyBlock => this + } +} + +case class Blocks(blocks: Seq[Block]) extends Block { + override lazy val exprValues: Set[ExprValue] = blocks.flatMap(_.exprValues).toSet + override def code: String = blocks.map(_.toString).mkString("\n") --- End diff -- I think one of the goals of this is letting us changing the string representation of an expression, so making it a lazy val may be fine for now, but would impede us from doing that. What do you think? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189212543 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,115 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Set[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + def length: Int = toString.length + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override lazy val exprValues: Set[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Set(e) + case _ => Set.empty[ExprValue] +}.toSet + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH) +buf append StringContext.treatEscapes(strings.next) +while (strings.hasNext) { + buf append inputs.next + buf append StringContext.treatEscapes(strings.next) +} +buf.toString --- End diff -- it is not needed because we have `checkLengths` at line 159. This is the same approach which is used by the `StringContext` in the scala library. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189208663 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala --- @@ -259,8 +260,8 @@ trait CodegenSupport extends SparkPlan { * them to be evaluated twice. */ protected def evaluateVariables(variables: Seq[ExprCode]): String = { -val evaluate = variables.filter(_.code != "").map(_.code.trim).mkString("\n") -variables.foreach(_.code = "") +val evaluate = variables.filter(_.code.toString != "").map(_.code.toString).mkString("\n") --- End diff -- nit: `_.code.toString.nonEmpty` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189206358 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala --- @@ -88,7 +89,7 @@ case class InputFileBlockLength() extends LeafExpression with Nondeterministic { override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val className = InputFileBlockHolder.getClass.getName.stripSuffix("$") -ev.copy(code = s"final ${CodeGenerator.javaType(dataType)} ${ev.value} = " + - s"$className.getLength();", isNull = FalseLiteral) +ev.copy(code = code"final ${CodeGenerator.javaType(dataType)} ${ev.value} = " + --- End diff -- ditto --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189208302 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,115 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Set[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + def length: Int = toString.length + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override lazy val exprValues: Set[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Set(e) + case _ => Set.empty[ExprValue] +}.toSet + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH) +buf append StringContext.treatEscapes(strings.next) +while (strings.hasNext) { + buf append inputs.next + buf append StringContext.treatEscapes(strings.next) +} +buf.toString + } + + override def + (other: Block): Block = other match { +case c: CodeBlock => Blocks(Seq(this, c)) +case b: Blocks => Blocks(Seq(this) ++ b.blocks) +case EmptyBlock => this + } +} + +case class Blocks(blocks: Seq[Block]) extends Block { + override lazy val exprValues: Set[ExprValue] = blocks.flatMap(_.exprValues).toSet + override def code: String = blocks.map(_.toString).mkString("\n") --- End diff -- shall we also make it lazy val? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189208129 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala --- @@ -58,14 +59,14 @@ private[sql] trait ColumnarBatchScan extends CodegenSupport { } val valueVar = ctx.freshName("value") val str = s"columnVector[$columnVar, $ordinal, ${dataType.simpleString}]" -val code = s"${ctx.registerComment(str)}\n" + (if (nullable) { - s""" +val code = code"${ctx.registerComment(str)}\n" + (if (nullable) { --- End diff -- the `\n` is not needed now, since we always add `\n` between code blocks. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189206338 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala --- @@ -65,8 +66,8 @@ case class InputFileBlockStart() extends LeafExpression with Nondeterministic { override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val className = InputFileBlockHolder.getClass.getName.stripSuffix("$") -ev.copy(code = s"final ${CodeGenerator.javaType(dataType)} ${ev.value} = " + - s"$className.getStartOffset();", isNull = FalseLiteral) +ev.copy(code = code"final ${CodeGenerator.javaType(dataType)} ${ev.value} = " + --- End diff -- ditto --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189202465 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala --- @@ -100,17 +101,18 @@ abstract class Expression extends TreeNode[Expression] { ctx.subExprEliminationExprs.get(this).map { subExprState => // This expression is repeated which means that the code to evaluate it has already been added // as a function before. In that case, we just re-use it. - ExprCode(ctx.registerComment(this.toString), subExprState.isNull, subExprState.value) + ExprCode(ctx.registerComment(this.toString), subExprState.isNull, +subExprState.value) --- End diff -- unnecessary change --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189204054 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,115 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Set[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + def length: Int = toString.length + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override lazy val exprValues: Set[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Set(e) + case _ => Set.empty[ExprValue] +}.toSet + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH) +buf append StringContext.treatEscapes(strings.next) +while (strings.hasNext) { + buf append inputs.next + buf append StringContext.treatEscapes(strings.next) +} +buf.toString --- End diff -- shall we add an assert that `assert(!inputs.hasNext)`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189207159 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala --- @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions.codegen + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.types.{BooleanType, IntegerType} + +class CodeBlockSuite extends SparkFunSuite { + + test("Block can interpolate string and ExprValue inputs") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val code = code"boolean ${isNull} = ${JavaCode.defaultLiteral(BooleanType)};" +assert(code.toString == "boolean expr1_isNull = false;") + } + + test("Block.stripMargin") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val value = JavaCode.variable("expr1", IntegerType) +val code1 = + code""" + |boolean $isNull = false; + |int $value = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin +val expected = + s""" +|boolean expr1_isNull = false; +|int expr1 = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin.trim +assert(code1.toString == expected) + +val code2 = + code""" + >boolean $isNull = false; + >int $value = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin('>') +assert(code2.toString == expected) + } + + test("Block can capture input expr values") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val value = JavaCode.variable("expr1", IntegerType) +val code = + code""" + |boolean $isNull = false; + |int $value = -1; + """.stripMargin +val exprValues = code.exprValues +assert(exprValues.size == 2) +assert(exprValues === Set(value, isNull)) + } + + test("concatenate blocks") { +val isNull1 = JavaCode.isNullVariable("expr1_isNull") +val value1 = JavaCode.variable("expr1", IntegerType) +val isNull2 = JavaCode.isNullVariable("expr2_isNull") +val value2 = JavaCode.variable("expr2", IntegerType) +val literal = JavaCode.literal("100", IntegerType) + +val code = + code""" + |boolean $isNull1 = false; + |int $value1 = -1;""".stripMargin + + code""" + |boolean $isNull2 = true; + |int $value2 = $literal;""".stripMargin + +val expected = + """ + |boolean expr1_isNull = false; + |int expr1 = -1; + |boolean expr2_isNull = true; + |int expr2 = 100;""".stripMargin.trim + +assert(code.toString == expected) + +val exprValues = code.exprValues +assert(exprValues.size == 5) +assert(exprValues === Set(isNull1, value1, isNull2, value2, literal)) + } + + test("Throws exception when interpolating unexcepted object in code block") { +val obj = TestClass(100) --- End diff -- we can simply use a Tuple2 here, like `1 -> 1` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189207915 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala --- @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions.codegen + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.types.{BooleanType, IntegerType} + +class CodeBlockSuite extends SparkFunSuite { + + test("Block can interpolate string and ExprValue inputs") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val code = code"boolean ${isNull} = ${JavaCode.defaultLiteral(BooleanType)};" +assert(code.toString == "boolean expr1_isNull = false;") + } + + test("Block.stripMargin") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val value = JavaCode.variable("expr1", IntegerType) +val code1 = + code""" + |boolean $isNull = false; + |int $value = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin +val expected = + s""" +|boolean expr1_isNull = false; +|int expr1 = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin.trim +assert(code1.toString == expected) + +val code2 = + code""" + >boolean $isNull = false; + >int $value = ${JavaCode.defaultLiteral(IntegerType)};""".stripMargin('>') +assert(code2.toString == expected) + } + + test("Block can capture input expr values") { +val isNull = JavaCode.isNullVariable("expr1_isNull") +val value = JavaCode.variable("expr1", IntegerType) +val code = + code""" + |boolean $isNull = false; + |int $value = -1; + """.stripMargin +val exprValues = code.exprValues +assert(exprValues.size == 2) +assert(exprValues === Set(value, isNull)) + } + + test("concatenate blocks") { +val isNull1 = JavaCode.isNullVariable("expr1_isNull") +val value1 = JavaCode.variable("expr1", IntegerType) +val isNull2 = JavaCode.isNullVariable("expr2_isNull") +val value2 = JavaCode.variable("expr2", IntegerType) +val literal = JavaCode.literal("100", IntegerType) + +val code = + code""" + |boolean $isNull1 = false; + |int $value1 = -1;""".stripMargin + + code""" + |boolean $isNull2 = true; + |int $value2 = $literal;""".stripMargin + +val expected = + """ + |boolean expr1_isNull = false; + |int expr1 = -1; + |boolean expr2_isNull = true; + |int expr2 = 100;""".stripMargin.trim + +assert(code.toString == expected) + +val exprValues = code.exprValues +assert(exprValues.size == 5) +assert(exprValues === Set(isNull1, value1, isNull2, value2, literal)) + } + + test("Throws exception when interpolating unexcepted object in code block") { +val obj = TestClass(100) +val e = intercept[IllegalArgumentException] { + code"$obj" +} +assert(e.getMessage().contains(s"Can not interpolate ${obj.getClass.getName}")) + } + + test("replace expr values in code block") { +val statement = JavaCode.expression("1 + 1", IntegerType) +val isNull = JavaCode.isNullVariable("expr1_isNull") +val exprInFunc = JavaCode.variable("expr1", IntegerType) + +val code = + code""" + |callFunc(int $statement) { + | boolean $isNull = false; + | int $exprInFunc = $statement + 1; + |}""".stripMargin + +val aliasedParam = JavaCode.variable("aliased", statement.javaType) +val aliasedInputs = code.asInstanceOf[CodeBlock].blockInputs.map
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r189206126 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala --- @@ -42,8 +43,8 @@ case class InputFileName() extends LeafExpression with Nondeterministic { override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val className = InputFileBlockHolder.getClass.getName.stripSuffix("$") -ev.copy(code = s"final ${CodeGenerator.javaType(dataType)} ${ev.value} = " + - s"$className.getInputFilePath();", isNull = FalseLiteral) +ev.copy(code = code"final ${CodeGenerator.javaType(dataType)} ${ev.value} = " + + code"$className.getInputFilePath();", isNull = FalseLiteral) --- End diff -- the `Blocks.code` is defined as `override def code: String = blocks.map(_.toString).mkString("\n")`, is it OK here? maybe we can do ``` val typeDef = s"final ${CodeGenerator.javaType(dataType)}" ev.code(code = code"$typeDef ${ev.value} = $className.getInputFilePath();", ...) ``` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r188959114 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,115 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Set[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + def length: Int = toString.length + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override lazy val exprValues: Set[ExprValue] = { +blockInputs.flatMap { --- End diff -- I think `foldLeft` has better performance but I am not sure whether the difference can be significant here --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r188956140 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,115 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Set[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + def length: Int = toString.length + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override lazy val exprValues: Set[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Set(e) + case _ => Set.empty[ExprValue] +}.toSet + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH) +buf append StringContext.treatEscapes(strings.next) +while (strings.hasNext) { + buf append inputs.next + buf append StringContext.treatEscapes(strings.next) +} +buf.toString + } + + override def + (other: Block): Block = other match { +case c: CodeBlock => Blocks(Seq(this, c)) +case b: Blocks => Blocks(Seq(this) ++ b.blocks) +case EmptyBlock => this + } +} + +case class Blocks(blocks: Seq[Block]) extends Block { + override lazy val exprValues: Set[ExprValue] = blocks.flatMap(_.exprValues).toSet --- End diff -- oh, sorry, `blocks ` is a `Seq`, sorry, my bad. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r188955270 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,115 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Set[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + def length: Int = toString.length + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override lazy val exprValues: Set[ExprValue] = { +blockInputs.flatMap { --- End diff -- I think `flatMap` looks simpler. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r188954468 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,115 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Set[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + def length: Int = toString.length + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override lazy val exprValues: Set[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Set(e) + case _ => Set.empty[ExprValue] +}.toSet + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH) +buf append StringContext.treatEscapes(strings.next) +while (strings.hasNext) { + buf append inputs.next + buf append StringContext.treatEscapes(strings.next) +} +buf.toString + } + + override def + (other: Block): Block = other match { +case c: CodeBlock => Blocks(Seq(this, c)) +case b: Blocks => Blocks(Seq(this) ++ b.blocks) +case EmptyBlock => this + } +} + +case class Blocks(blocks: Seq[Block]) extends Block { + override lazy val exprValues: Set[ExprValue] = blocks.flatMap(_.exprValues).toSet --- End diff -- It's required. Otherwise a type mismatch compile error. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r188950746 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,115 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Set[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + def length: Int = toString.length + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override lazy val exprValues: Set[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Set(e) + case _ => Set.empty[ExprValue] +}.toSet + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH) +buf append StringContext.treatEscapes(strings.next) +while (strings.hasNext) { + buf append inputs.next + buf append StringContext.treatEscapes(strings.next) +} +buf.toString + } + + override def + (other: Block): Block = other match { +case c: CodeBlock => Blocks(Seq(this, c)) +case b: Blocks => Blocks(Seq(this) ++ b.blocks) +case EmptyBlock => this + } +} + +case class Blocks(blocks: Seq[Block]) extends Block { + override lazy val exprValues: Set[ExprValue] = blocks.flatMap(_.exprValues).toSet --- End diff -- `toSet` is redundant I think --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r188950520 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,115 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Set[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + def length: Int = toString.length + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override lazy val exprValues: Set[ExprValue] = { +blockInputs.flatMap { --- End diff -- what about `foldLeft(Set.empty[ExprValue]) { ... }`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r18894 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,113 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] --- End diff -- Yes, you're right. Sorry, just out of focus. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r188907963 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,113 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] --- End diff -- isn't it enough to compare `exprValues` with a `Set` containing the expected elements instead of comparing them one by one? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r188904786 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,113 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] --- End diff -- One issue I just found is, I can use the `ExprValues` order to do test easily. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r188857554 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,113 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] --- End diff -- yes, I think it is not and we are keeping here the order of the first occurrence (so for instance if we want to check which is the last reference we can't really know). I think a `Set` would be better since it enforces what we are keeping here, it avoids all the distinct (which we may forget to add somewhere getting a bad result...) --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r188830465 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,113 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] --- End diff -- Not sure the order among those `ExprValue`s will be useful or not. Sounds a `Set` is good. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r187982910 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + - castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)) + code""" +// Cast from ${eval.value}, ${eval.isNull} --- End diff -- another +1 for 2 --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user maropu commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r187978104 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + - castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)) + code""" +// Cast from ${eval.value}, ${eval.isNull} --- End diff -- +1 for the second one, too. (@hvanhovell 's comment sounds reasonable to me.) --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r187612068 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,113 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] --- End diff -- I'd make it a `Set` rather than a `Seq`. What do you think? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r187058840 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala --- @@ -100,17 +101,18 @@ abstract class Expression extends TreeNode[Expression] { ctx.subExprEliminationExprs.get(this).map { subExprState => // This expression is repeated which means that the code to evaluate it has already been added // as a function before. In that case, we just re-use it. - ExprCode(ctx.registerComment(this.toString), subExprState.isNull, subExprState.value) + ExprCode(code"${ctx.registerComment(this.toString)}", subExprState.isNull, --- End diff -- Done. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r187058702 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + - castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)) + code""" +// Cast from ${eval.value}, ${eval.isNull} --- End diff -- @hvanhovell @cloud-fan Do you think we should continue to ban `String` in the code block string context (option 2) in a follow-up? This is a quite big PR for now. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r187036570 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala --- @@ -56,19 +57,19 @@ import org.apache.spark.util.{ParentClassLoader, Utils} * @param value A term for a (possibly primitive) value of the result of the evaluation. Not * valid if `isNull` is set to `true`. */ -case class ExprCode(var code: String, var isNull: ExprValue, var value: ExprValue) +case class ExprCode(var code: Block, var isNull: ExprValue, var value: ExprValue) object ExprCode { def apply(isNull: ExprValue, value: ExprValue): ExprCode = { -ExprCode(code = "", isNull, value) +ExprCode(code = code"", isNull, value) --- End diff -- Ok. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r187034014 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + --- End diff -- ok. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r187033550 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + - castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)) + code""" +// Cast from ${eval.value}, ${eval.isNull} +${castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)} --- End diff -- We can use `ExprValue` and `Block` around everywhere in `Cast` here (I did in a local branch), just it increases some code diff, so I'm not sure if we want it be here or as a follow-up if it's easier for review. Inputs to `Cast` should be only `eval.value` and `eval.isNull` as it's only child expression to `Cast`. We add generated codes here but they are not actually input expressions to `Cast`. So for now I manually let `eval.value` and `eval.isNull` be tracked. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186681857 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + - castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)) + code""" +// Cast from ${eval.value}, ${eval.isNull} +${castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)} --- End diff -- I might miss something, but don't we need to pass `ExprValue` or `Block` and return `Block` from `castCode()` to keep track of code snippets and inputs? We can say the same thing anywhere we create a smaller code snippet using `s` interpolation. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186677863 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala --- @@ -100,17 +101,18 @@ abstract class Expression extends TreeNode[Expression] { ctx.subExprEliminationExprs.get(this).map { subExprState => // This expression is repeated which means that the code to evaluate it has already been added // as a function before. In that case, we just re-use it. - ExprCode(ctx.registerComment(this.toString), subExprState.isNull, subExprState.value) + ExprCode(code"${ctx.registerComment(this.toString)}", subExprState.isNull, --- End diff -- How about returning `Block` from `ctx.registerComment()`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186683015 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala --- @@ -56,19 +57,19 @@ import org.apache.spark.util.{ParentClassLoader, Utils} * @param value A term for a (possibly primitive) value of the result of the evaluation. Not * valid if `isNull` is set to `true`. */ -case class ExprCode(var code: String, var isNull: ExprValue, var value: ExprValue) +case class ExprCode(var code: Block, var isNull: ExprValue, var value: ExprValue) object ExprCode { def apply(isNull: ExprValue, value: ExprValue): ExprCode = { -ExprCode(code = "", isNull, value) +ExprCode(code = code"", isNull, value) --- End diff -- `EmptyBlock`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186674665 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + --- End diff -- nit: how about moving `eval.code` into the following `code` interpolation? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186679356 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala --- @@ -119,7 +121,7 @@ abstract class Expression extends TreeNode[Expression] { private def reduceCodeSize(ctx: CodegenContext, eval: ExprCode): Unit = { // TODO: support whole stage codegen too -if (eval.code.trim.length > 1024 && ctx.INPUT_ROW != null && ctx.currentVars == null) { +if (eval.code.toString.length > 1024 && ctx.INPUT_ROW != null && ctx.currentVars == null) { --- End diff -- Sure. Good idea. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186679287 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,113 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] --- End diff -- They are `ExprValues` in the inputs. Inputs may contain other than `ExprValues`, e.g. other blocks, literals... --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186679018 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,113 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') --- End diff -- Like: ```scala code""" | val ... | ... """.stripMargin ``` It's basically for compatibility. We can remove this and disallow `stripMargin(customPrefix)`. WDYT? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user hvanhovell commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186670840 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,113 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') --- End diff -- When do we need this? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user hvanhovell commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186670675 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,6 +114,113 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] --- End diff -- These are the inputs right? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user hvanhovell commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186669860 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala --- @@ -119,7 +121,7 @@ abstract class Expression extends TreeNode[Expression] { private def reduceCodeSize(ctx: CodegenContext, eval: ExprCode): Unit = { // TODO: support whole stage codegen too -if (eval.code.trim.length > 1024 && ctx.INPUT_ROW != null && ctx.currentVars == null) { +if (eval.code.toString.length > 1024 && ctx.INPUT_ROW != null && ctx.currentVars == null) { --- End diff -- Can you add a method to `code` that produces the size directly? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186661027 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + - castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)) + code""" +// Cast from ${eval.value}, ${eval.isNull} --- End diff -- I already replaced string to `Block` in `Cast` locally (not committed yet). I'm fine to work with a manual block anyway for now in this case. We can address this in follow-ups. Indeed, I'm afraid that it's a bit tedious to identify references and fill them in correct order. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user hvanhovell commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186644321 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + - castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)) + code""" +// Cast from ${eval.value}, ${eval.isNull} --- End diff -- In this particular case I think we should not use the string interpolator. My preferred end game would be that the `CodeGenerator` functions will just return blocks (or something like that) instead of an opaque strings. That is definitely something we should do in a follow up, can we for now just manually create the block? That being said, if we are going to pick then I'd strong prefer option 2. I think option 1 is much harder to work with, and is also potentially buggy (what happens if you get the order wrong). --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186630931 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + - castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)) + code""" +// Cast from ${eval.value}, ${eval.isNull} --- End diff -- Yeah. Ok. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186630643 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + - castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)) + code""" +// Cast from ${eval.value}, ${eval.isNull} --- End diff -- let's hold it and see what others think about. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186627928 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + - castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)) + code""" +// Cast from ${eval.value}, ${eval.isNull} --- End diff -- I prefer 2. Explicitly specifying the references sounds a bit verbose to me. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186627099 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + - castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)) + code""" +// Cast from ${eval.value}, ${eval.isNull} --- End diff -- I feel it's a little fragile to depend on the `StringContext` to collect references. 2 proposal: 1. ask the code builder to explicitly specify the references, like `JavaCode.block(code = xxx, ref1, ref2, ...)` 2. ban `String` in the code block string context, and create a special class to insert `String` literal to code block, so that we won't mistakently pass code as string and lose references. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186627026 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + - castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)) + code""" +// Cast from ${eval.value}, ${eval.isNull} --- End diff -- Btw, I'm working to remove this manual reference. But it might increase code diff a lot. It's better to leave it to follow-up. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186626823 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + - castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)) + code""" +// Cast from ${eval.value}, ${eval.isNull} --- End diff -- Oh, sorry for misunderstanding. No I don't see any other like `Cast`. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186625672 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + - castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)) + code""" +// Cast from ${eval.value}, ${eval.isNull} --- End diff -- I mean, is there other expressions like `Cast` that we need to manually add reference? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186624071 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + - castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)) + code""" +// Cast from ${eval.value}, ${eval.isNull} --- End diff -- During changing to `Block` in `Cast`, I do think of this again. `eval` is the only expression for `Cast`, thus it should be the only `ExprValue` input to `Cast`'s code block we need to take care. Other `ExprValue` like the children of `eval.value` should be taken care in the block `eval.code`. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186621034 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,19 +114,128 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Seq(e) --- End diff -- Note that we should not do `distinct` on `blockInputs` because the length of `codeParts` and `blockInputs` must be matched. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186618899 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,19 +114,128 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { --- End diff -- Ok. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186618877 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,19 +114,128 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Seq(e) + case _ => Seq.empty +} + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH) +buf append StringContext.treatEscapes(strings.next) +while (strings.hasNext) { + buf append inputs.next + buf append StringContext.treatEscapes(strings.next) +} +buf.toString + } + + override def + (other: Block): Block = other match { +case c: CodeBlock => Blocks(Seq(this, c)) +case b: Blocks => Blocks(Seq(this) ++ b.blocks) +case EmptyBlock => this + } +} + +case class Blocks(blocks: Seq[Block]) extends Block { + override def exprValues: Seq[ExprValue] = blocks.flatMap(_.exprValues) + override def code: String = blocks.map(_.toString).mkString("\n") + + override def + (other: Block): Block = other match { +case c: CodeBlock => Blocks(blocks :+ c) +case b: Blocks => Blocks(blocks ++ b.blocks) +case EmptyBlock => this + } +} + +object EmptyBlock extends Block with Serializable { + override def code: String = "" + override def exprValues: Seq[ExprValue] = Seq.empty + + override def + (other: Block): Block = other +} + /** * A typed java fragment that must be a valid java expression. */ trait ExprValue extends JavaCode { def javaType: Class[_] def isPrimitive: Boolean = javaType.isPrimitive + + // This will be called during string interpolation. + override def toString: String = ExprValue.exprValueToString(this) --- End diff -- Forgot to revert this change. In previous commit, I need `ExprValue.exprValueToString` as the only entry for tracking `ExprValue`. This should be reverted now. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186618580 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,19 +114,128 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Seq(e) --- End diff -- It's possible. A distinct helps here? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186618412 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + - castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)) + code""" +// Cast from ${eval.value}, ${eval.isNull} --- End diff -- I'd like to clean this part further. The generated codes in `Cast` are tangled now, IMHO. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186617171 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,19 +114,128 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Seq(e) + case _ => Seq.empty +} + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH) +buf append StringContext.treatEscapes(strings.next) +while (strings.hasNext) { + buf append inputs.next + buf append StringContext.treatEscapes(strings.next) +} +buf.toString + } + + override def + (other: Block): Block = other match { +case c: CodeBlock => Blocks(Seq(this, c)) +case b: Blocks => Blocks(Seq(this) ++ b.blocks) +case EmptyBlock => this + } +} + +case class Blocks(blocks: Seq[Block]) extends Block { + override def exprValues: Seq[ExprValue] = blocks.flatMap(_.exprValues) + override def code: String = blocks.map(_.toString).mkString("\n") + + override def + (other: Block): Block = other match { +case c: CodeBlock => Blocks(blocks :+ c) +case b: Blocks => Blocks(blocks ++ b.blocks) +case EmptyBlock => this + } +} + +object EmptyBlock extends Block with Serializable { + override def code: String = "" + override def exprValues: Seq[ExprValue] = Seq.empty + + override def + (other: Block): Block = other +} + /** * A typed java fragment that must be a valid java expression. */ trait ExprValue extends JavaCode { def javaType: Class[_] def isPrimitive: Boolean = javaType.isPrimitive + + // This will be called during string interpolation. + override def toString: String = ExprValue.exprValueToString(this) --- End diff -- why is it needed? `JavaCode` already defines `override def toString: String = code` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186615731 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala --- @@ -623,8 +624,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) + +// Below the code comment including `eval.value` and `eval.isNull` is a trick. It makes the two +// expr values are referred by this code block. ev.copy(code = eval.code + - castCode(ctx, eval.value, eval.isNull, ev.value, ev.isNull, dataType, nullSafeCast)) + code""" +// Cast from ${eval.value}, ${eval.isNull} --- End diff -- how to guarantee this is the only one we need to take care? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186617013 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,19 +114,128 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Seq(e) --- End diff -- is it possible that a `ExprValue` is referenced twice in the code string? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186616773 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -114,19 +114,128 @@ object JavaCode { } } +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // Returns java code string for this code block. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c).trim +case _ => code.trim + } + + // The leading prefix that should be stripped from each line. + // By default we strip blanks or control characters followed by '|' from the line. + var _marginChar: Option[Char] = Some('|') + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + // Concatenates this block with other block. + def + (other: Block): Block +} + +object Block { + + val CODE_BLOCK_BUFFER_LENGTH: Int = 512 + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { --- End diff -- `lazy val`? this might be expensive. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user hvanhovell commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186361480 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + def + (other: Block): Block +} + +object Block { + implicit def blockToString(block: Block): String = block.toString + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") --- End diff -- +10 --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186359947 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/ExprValueSuite.scala --- @@ -17,6 +17,8 @@ package org.apache.spark.sql.catalyst.expressions.codegen +import scala.collection.mutable --- End diff -- my bad. forgot to remove. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user hvanhovell commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186356233 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/ExprValueSuite.scala --- @@ -17,6 +17,8 @@ package org.apache.spark.sql.catalyst.expressions.codegen +import scala.collection.mutable --- End diff -- ??? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186263675 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala --- @@ -64,7 +64,7 @@ case class CreateArray(children: Seq[Expression]) extends Expression { val (preprocess, assigns, postprocess, arrayData) = GenArrayData.genCodeToCreateArrayData(ctx, et, evals, false) ev.copy( - code = JavaCode.block(preprocess + assigns + postprocess), + code = code"$preprocess" + code"$assigns" + code"$postprocess", --- End diff -- oh, yes. Will update it in next commit. Thanks. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186263241 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala --- @@ -64,7 +64,7 @@ case class CreateArray(children: Seq[Expression]) extends Expression { val (preprocess, assigns, postprocess, arrayData) = GenArrayData.genCodeToCreateArrayData(ctx, et, evals, false) ev.copy( - code = JavaCode.block(preprocess + assigns + postprocess), + code = code"$preprocess" + code"$assigns" + code"$postprocess", --- End diff -- nit: can this be `code"${preprocess}${assigns}${postprocess}"`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186254876 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + def + (other: Block): Block +} + +object Block { + implicit def blockToString(block: Block): String = block.toString + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Seq(e) + case _ => Seq.empty +} + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +var buf = new StringBuffer(strings.next) +while (strings.hasNext) { + buf append inputs.next + buf append strings.next +} +buf.toString + } + + override def + (other: Block): Block = other match { +case c: CodeBlock => Blocks(Seq(this, c)) +case b: Blocks => Blocks(Seq(this) ++ b.blocks) +case EmptyBlock => this + } +} + +case class Blocks(blocks: Seq[Block]) extends Block { + override def exprValues: Seq[ExprValue] = blocks.flatMap(_.exprValues) + override def code: String = blocks.map(_.toString).mkString --- End diff -- +1 for @viirya 's answer --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186251510 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { --- End diff -- I think it is a good idea. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186251439 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { --- End diff -- It is OK to prepare `stripMargin` API for backward compatibility. Can we do stripping operation regarding `|` inside `Block` as default. Then, we can eliminate `Margin` method in new code. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186251251 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + def + (other: Block): Block +} + +object Block { + implicit def blockToString(block: Block): String = block.toString + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Seq(e) + case _ => Seq.empty +} + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +var buf = new StringBuffer(strings.next) --- End diff -- Then how about ask for `StringBuilder` with larger initial capacity? Like: ```scala val buf = new StringBuilder(512) buf append string.next ``` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186251147 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + def + (other: Block): Block +} + +object Block { + implicit def blockToString(block: Block): String = block.toString + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Seq(e) + case _ => Seq.empty +} + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +var buf = new StringBuffer(strings.next) --- End diff -- Can we increase the initial size of this `StringBuffer`? This is because reallocating a buffer would occur frequently in the following loop if the initial size is small. It may lead to memory copy and GC. For example, ``` val buf = new StringBuilder(string.next) buf.ensureCapacity(512) ``` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186251103 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + def + (other: Block): Block +} + +object Block { + implicit def blockToString(block: Block): String = block.toString + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Seq(e) + case _ => Seq.empty +} + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +var buf = new StringBuffer(strings.next) --- End diff -- Ok. Looks like `StringContext` also uses `StringBuilder`. Will update in next commit. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186251067 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + def + (other: Block): Block +} + +object Block { + implicit def blockToString(block: Block): String = block.toString + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Seq(e) + case _ => Seq.empty +} + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +var buf = new StringBuffer(strings.next) --- End diff -- `StringBuilder` is better since it does not have sychronization. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186250918 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + def + (other: Block): Block +} + +object Block { + implicit def blockToString(block: Block): String = block.toString + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Seq(e) + case _ => Seq.empty +} + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +var buf = new StringBuffer(strings.next) +while (strings.hasNext) { + buf append inputs.next + buf append strings.next +} +buf.toString + } + + override def + (other: Block): Block = other match { +case c: CodeBlock => Blocks(Seq(this, c)) +case b: Blocks => Blocks(Seq(this) ++ b.blocks) +case EmptyBlock => this + } +} + +case class Blocks(blocks: Seq[Block]) extends Block { + override def exprValues: Seq[ExprValue] = blocks.flatMap(_.exprValues) + override def code: String = blocks.map(_.toString).mkString --- End diff -- `Blocks` is conceptually concatenated code blocks which isn't necessarily to be a compile unit like one single java method. `Blocks` is for convenient usage like: ``` val block1 = code""" | val ... | ... """ val block2 = code""" | val ... | ... """ ExprCode(code = block1 + block2, ...) ``` If you use `Blocks` to wrap java code of a single huge java method, it definitely hits 64KB issue. But it is as doing this with java code string for now, so `Blocks` itself should not cause this problem, IMHO. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186250803 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + def + (other: Block): Block +} + +object Block { + implicit def blockToString(block: Block): String = block.toString + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Seq(e) + case _ => Seq.empty +} + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +var buf = new StringBuffer(strings.next) +while (strings.hasNext) { + buf append inputs.next + buf append strings.next +} +buf.toString + } + + override def + (other: Block): Block = other match { +case c: CodeBlock => Blocks(Seq(this, c)) +case b: Blocks => Blocks(Seq(this) ++ b.blocks) +case EmptyBlock => this + } +} + +case class Blocks(blocks: Seq[Block]) extends Block { + override def exprValues: Seq[ExprValue] = blocks.flatMap(_.exprValues) + override def code: String = blocks.map(_.toString).mkString --- End diff -- Sorry for confusing you. I am curious whether concatenated string may generate more than 64KB Java bytecode. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186250693 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + def + (other: Block): Block +} + +object Block { + implicit def blockToString(block: Block): String = block.toString + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Seq(e) + case _ => Seq.empty +} + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +var buf = new StringBuffer(strings.next) +while (strings.hasNext) { + buf append inputs.next + buf append strings.next +} +buf.toString + } + + override def + (other: Block): Block = other match { +case c: CodeBlock => Blocks(Seq(this, c)) +case b: Blocks => Blocks(Seq(this) ++ b.blocks) +case EmptyBlock => this + } +} + +case class Blocks(blocks: Seq[Block]) extends Block { + override def exprValues: Seq[ExprValue] = blocks.flatMap(_.exprValues) + override def code: String = blocks.map(_.toString).mkString --- End diff -- Is it that concatenated string is more than 64KB? I can do a simple test. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186250678 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + def + (other: Block): Block +} + +object Block { + implicit def blockToString(block: Block): String = block.toString + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Seq(e) + case _ => Seq.empty +} + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +var buf = new StringBuffer(strings.next) --- End diff -- Oh. yes. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186250676 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + def + (other: Block): Block +} + +object Block { + implicit def blockToString(block: Block): String = block.toString + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") --- End diff -- I'd like to limit the types of objects we can interpolate at the first. So there will be less cases I'm not aware of. Can be open to all others later. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186250659 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + def + (other: Block): Block +} + +object Block { + implicit def blockToString(block: Block): String = block.toString + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") --- End diff -- I feels it's more like an illegal argument to string interpolator for now. I'm open for others ideas on this. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186250552 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { --- End diff -- Currently we have many usage like" ``` s""" | val isNull = false; | ... """.stripMargin ``` To enable this usage with `Block`, so we need to make `Block` with `stripMargin` API. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user maropu commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186247026 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + def + (other: Block): Block +} + +object Block { + implicit def blockToString(block: Block): String = block.toString + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") --- End diff -- Runtime exception? `sys.error`? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user maropu commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186247501 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { --- End diff -- We need to expose this function? If we could hide this stripping operation inside `Block`, I think it would be nice. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user maropu commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186247824 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + def + (other: Block): Block +} + +object Block { + implicit def blockToString(block: Block): String = block.toString + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Seq(e) + case _ => Seq.empty +} + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +var buf = new StringBuffer(strings.next) --- End diff -- nit: `val`. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user maropu commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r186248265 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + def + (other: Block): Block +} + +object Block { + implicit def blockToString(block: Block): String = block.toString + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") --- End diff -- Or, how about accepting other values as strings? e.g., `case other => other.toString` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21193: [SPARK-24121][SQL] Add API for handling expressio...
Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21193#discussion_r185995523 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala --- @@ -112,6 +112,112 @@ object JavaCode { def isNullExpression(code: String): SimpleExprValue = { expression(code, BooleanType) } + + def block(code: String): Block = { +CodeBlock(codeParts = Seq(code), blockInputs = Seq.empty) + } +} + +/** + * A trait representing a block of java code. + */ +trait Block extends JavaCode { + + // The expressions to be evaluated inside this block. + def exprValues: Seq[ExprValue] + + // This will be called during string interpolation. + override def toString: String = _marginChar match { +case Some(c) => code.stripMargin(c) +case _ => code + } + + var _marginChar: Option[Char] = None + + def stripMargin(c: Char): this.type = { +_marginChar = Some(c) +this + } + + def stripMargin: this.type = { +_marginChar = Some('|') +this + } + + def + (other: Block): Block +} + +object Block { + implicit def blockToString(block: Block): String = block.toString + + implicit def blocksToBlock(blocks: Seq[Block]): Block = Blocks(blocks) + + implicit class BlockHelper(val sc: StringContext) extends AnyVal { +def code(args: Any*): Block = { + sc.checkLengths(args) + if (sc.parts.length == 0) { +EmptyBlock + } else { +args.foreach { + case _: ExprValue => + case _: Int | _: Long | _: Float | _: Double | _: String => + case _: Block => + case other => throw new IllegalArgumentException( +s"Can not interpolate ${other.getClass.getName} into code block.") +} +CodeBlock(sc.parts, args) + } +} + } +} + +/** + * A block of java code. Including a sequence of code parts and some inputs to this block. + * The actual java code is generated by embedding the inputs into the code parts. + */ +case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[Any]) extends Block { + override def exprValues: Seq[ExprValue] = { +blockInputs.flatMap { + case b: Block => b.exprValues + case e: ExprValue => Seq(e) + case _ => Seq.empty +} + } + + override def code: String = { +val strings = codeParts.iterator +val inputs = blockInputs.iterator +var buf = new StringBuffer(strings.next) +while (strings.hasNext) { + buf append inputs.next + buf append strings.next +} +buf.toString + } + + override def + (other: Block): Block = other match { +case c: CodeBlock => Blocks(Seq(this, c)) +case b: Blocks => Blocks(Seq(this) ++ b.blocks) +case EmptyBlock => this + } +} + +case class Blocks(blocks: Seq[Block]) extends Block { + override def exprValues: Seq[ExprValue] = blocks.flatMap(_.exprValues) + override def code: String = blocks.map(_.toString).mkString --- End diff -- I am just curious whether `mkString` may lead to 64KB limit issue. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org