Repository: spark
Updated Branches:
refs/heads/master ace41e8bf -> 3a9d66cf5
[SPARK-4061][SQL] We cannot use EOL character in the operand of LIKE predicate.
We cannot use EOL character like \n or \r in the operand of LIKE predicate.
So following condition is never true.
-- someStr is 'hoge\nfuga'
where someStr LIKE 'hoge_fuga'
Author: Kousuke Saruta <[email protected]>
Closes #2908 from sarutak/spark-sql-like-match-modification and squashes the
following commits:
d15798b [Kousuke Saruta] Remove test setting for thriftserver
f99a2f4 [Kousuke Saruta] Fixed LIKE predicate so that we can use EOL character
as in a operand
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3a9d66cf
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3a9d66cf
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3a9d66cf
Branch: refs/heads/master
Commit: 3a9d66cf59ab7c9aee090e4c6067c73510e2ac26
Parents: ace41e8
Author: Kousuke Saruta <[email protected]>
Authored: Sun Oct 26 16:54:07 2014 -0700
Committer: Michael Armbrust <[email protected]>
Committed: Sun Oct 26 16:54:07 2014 -0700
----------------------------------------------------------------------
.../catalyst/expressions/stringOperations.scala | 42 +++++++++-----------
.../expressions/ExpressionEvaluationSuite.scala | 6 +++
2 files changed, 25 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/3a9d66cf/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index c2a3a5c..f634976 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -102,31 +102,27 @@ case class Like(left: Expression, right: Expression)
// replace the _ with .{1} exactly match 1 time of any character
// replace the % with .*, match 0 or more times with any character
- override def escape(v: String) = {
- val sb = new StringBuilder()
- var i = 0;
- while (i < v.length) {
- // Make a special case for "\\_" and "\\%"
- val n = v.charAt(i);
- if (n == '\\' && i + 1 < v.length && (v.charAt(i + 1) == '_' ||
v.charAt(i + 1) == '%')) {
- sb.append(v.charAt(i + 1))
- i += 1
- } else {
- if (n == '_') {
- sb.append(".");
- } else if (n == '%') {
- sb.append(".*");
- } else {
- sb.append(Pattern.quote(Character.toString(n)));
- }
- }
-
- i += 1
+ override def escape(v: String) =
+ if (!v.isEmpty) {
+ "(?s)" + (' ' +: v.init).zip(v).flatMap {
+ case (prev, '\\') => ""
+ case ('\\', c) =>
+ c match {
+ case '_' => "_"
+ case '%' => "%"
+ case _ => Pattern.quote("\\" + c)
+ }
+ case (prev, c) =>
+ c match {
+ case '_' => "."
+ case '%' => ".*"
+ case _ => Pattern.quote(Character.toString(c))
+ }
+ }.mkString
+ } else {
+ v
}
- sb.toString()
- }
-
override def matches(regex: Pattern, str: String): Boolean =
regex.matcher(str).matches()
}
http://git-wip-us.apache.org/repos/asf/spark/blob/3a9d66cf/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index f134d73..53c5348 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -191,6 +191,9 @@ class ExpressionEvaluationSuite extends FunSuite {
checkEvaluation("abc" like "a%", true)
checkEvaluation("abc" like "b%", false)
checkEvaluation("abc" like "bc%", false)
+ checkEvaluation("a\nb" like "a_b", true)
+ checkEvaluation("ab" like "a%b", true)
+ checkEvaluation("a\nb" like "a%b", true)
}
test("LIKE Non-literal Regular Expression") {
@@ -207,6 +210,9 @@ class ExpressionEvaluationSuite extends FunSuite {
checkEvaluation("abc" like regEx, true, new GenericRow(Array[Any]("a%")))
checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("b%")))
checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("bc%")))
+ checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a_b")))
+ checkEvaluation("ab" like regEx, true, new GenericRow(Array[Any]("a%b")))
+ checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a%b")))
checkEvaluation(Literal(null, StringType) like regEx, null, new
GenericRow(Array[Any]("bc%")))
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]