Repository: spark Updated Branches: refs/heads/master ae8ca4dfb -> f62c42728
[SPARK-2431][SQL] Refine StringComparison and related codes. Refine `StringComparison` and related codes as follows: - `StringComparison` could be similar to `StringRegexExpression` or `CaseConversionExpression`. - Nullability of `StringRegexExpression` could depend on children's nullabilities. - Add a case that the like condition includes no wildcard to `LikeSimplification`. Author: Takuya UESHIN <[email protected]> Closes #1357 from ueshin/issues/SPARK-2431 and squashes the following commits: 77766f5 [Takuya UESHIN] Add a case that the like condition includes no wildcard to LikeSimplification. b9da9d2 [Takuya UESHIN] Fix nullability of StringRegexExpression. 680bb72 [Takuya UESHIN] Refine StringComparison. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f62c4272 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f62c4272 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f62c4272 Branch: refs/heads/master Commit: f62c42728990266d5d5099abe241f699189ba025 Parents: ae8ca4d Author: Takuya UESHIN <[email protected]> Authored: Thu Jul 10 19:20:00 2014 -0700 Committer: Michael Armbrust <[email protected]> Committed: Thu Jul 10 19:20:00 2014 -0700 ---------------------------------------------------------------------- .../catalyst/expressions/stringOperations.scala | 28 +++++++++----------- .../sql/catalyst/optimizer/Optimizer.scala | 3 +++ 2 files changed, 16 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/f62c4272/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index 347471c..b385053 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -23,7 +23,6 @@ import org.apache.spark.sql.catalyst.types.DataType import org.apache.spark.sql.catalyst.types.StringType import org.apache.spark.sql.catalyst.types.BooleanType - trait StringRegexExpression { self: BinaryExpression => @@ -32,7 +31,7 @@ trait StringRegexExpression { def escape(v: String): String def matches(regex: Pattern, str: String): Boolean - def nullable: Boolean = true + def nullable: Boolean = left.nullable || right.nullable def dataType: DataType = BooleanType // try cache the pattern for Literal @@ -157,19 +156,13 @@ case class Lower(child: Expression) extends UnaryExpression with CaseConversionE override def toString() = s"Lower($child)" } -/** A base class for functions that compare two strings, returning a boolean. */ -abstract class StringComparison extends Expression { - self: Product => +/** A base trait for functions that compare two strings, returning a boolean. */ +trait StringComparison { + self: BinaryExpression => type EvaluatedType = Any - def left: Expression - def right: Expression - - override def references = children.flatMap(_.references).toSet - override def children = left :: right :: Nil - - override def nullable: Boolean = true + def nullable: Boolean = left.nullable || right.nullable override def dataType: DataType = BooleanType def compare(l: String, r: String): Boolean @@ -184,26 +177,31 @@ abstract class StringComparison extends Expression { } } + def symbol: String = nodeName + override def toString() = s"$nodeName($left, $right)" } /** * A function that returns true if the string `left` contains the string `right`. */ -case class Contains(left: Expression, right: Expression) extends StringComparison { +case class Contains(left: Expression, right: Expression) + extends BinaryExpression with StringComparison { override def compare(l: String, r: String) = l.contains(r) } /** * A function that returns true if the string `left` starts with the string `right`. */ -case class StartsWith(left: Expression, right: Expression) extends StringComparison { +case class StartsWith(left: Expression, right: Expression) + extends BinaryExpression with StringComparison { def compare(l: String, r: String) = l.startsWith(r) } /** * A function that returns true if the string `left` ends with the string `right`. */ -case class EndsWith(left: Expression, right: Expression) extends StringComparison { +case class EndsWith(left: Expression, right: Expression) + extends BinaryExpression with StringComparison { def compare(l: String, r: String) = l.endsWith(r) } http://git-wip-us.apache.org/repos/asf/spark/blob/f62c4272/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index f0904f5..a142310 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -123,6 +123,7 @@ object LikeSimplification extends Rule[LogicalPlan] { val startsWith = "([^_%]+)%".r val endsWith = "%([^_%]+)".r val contains = "%([^_%]+)%".r + val equalTo = "([^_%]*)".r def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { case Like(l, Literal(startsWith(pattern), StringType)) if !pattern.endsWith("\\") => @@ -131,6 +132,8 @@ object LikeSimplification extends Rule[LogicalPlan] { EndsWith(l, Literal(pattern)) case Like(l, Literal(contains(pattern), StringType)) if !pattern.endsWith("\\") => Contains(l, Literal(pattern)) + case Like(l, Literal(equalTo(pattern), StringType)) => + EqualTo(l, Literal(pattern)) } }
