Repository: spark
Updated Branches:
  refs/heads/master b1e5281c5 -> c9a676215


[SPARK-16199][SQL] Add a method to list the referenced columns in data source 
Filter

## What changes were proposed in this pull request?
It would be useful to support listing the columns that are referenced by a 
filter. This can help simplify data source planning, because with this we would 
be able to implement unhandledFilters method in HadoopFsRelation.

This is based on rxin's patch (#13901) and adds unit tests.

## How was this patch tested?
Added a new suite FiltersSuite.

Author: petermaxlee <petermax...@gmail.com>
Author: Reynold Xin <r...@databricks.com>

Closes #14120 from petermaxlee/SPARK-16199.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c9a67621
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c9a67621
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c9a67621

Branch: refs/heads/master
Commit: c9a6762150cfd62691a6361e05d2839b110fe8d0
Parents: b1e5281
Author: petermaxlee <petermax...@gmail.com>
Authored: Mon Jul 11 22:23:32 2016 -0700
Committer: Reynold Xin <r...@databricks.com>
Committed: Mon Jul 11 22:23:32 2016 -0700

----------------------------------------------------------------------
 project/MimaExcludes.scala                      |  7 +-
 .../org/apache/spark/sql/sources/filters.scala  | 71 ++++++++++++----
 .../apache/spark/sql/sources/FiltersSuite.scala | 89 ++++++++++++++++++++
 3 files changed, 151 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/c9a67621/project/MimaExcludes.scala
----------------------------------------------------------------------
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 4bd6156..5606155 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -35,7 +35,12 @@ import com.typesafe.tools.mima.core.ProblemFilters._
 object MimaExcludes {
 
   // Exclude rules for 2.1.x
-  lazy val v21excludes = v20excludes
+  lazy val v21excludes = v20excludes ++ {
+    Seq(
+      // [SPARK-16199][SQL] Add a method to list the referenced columns in 
data source Filter
+      
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.sources.Filter.references")
+    )
+  }
 
   // Exclude rules for 2.0.x
   lazy val v20excludes = {

http://git-wip-us.apache.org/repos/asf/spark/blob/c9a67621/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
index 9130e77..13c0766 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
@@ -26,7 +26,18 @@ package org.apache.spark.sql.sources
  *
  * @since 1.3.0
  */
-abstract class Filter
+abstract class Filter {
+  /**
+   * List of columns that are referenced by this filter.
+   * @since 2.1.0
+   */
+  def references: Array[String]
+
+  protected def findReferences(value: Any): Array[String] = value match {
+    case f: Filter => f.references
+    case _ => Array.empty
+  }
+}
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a value
@@ -34,7 +45,9 @@ abstract class Filter
  *
  * @since 1.3.0
  */
-case class EqualTo(attribute: String, value: Any) extends Filter
+case class EqualTo(attribute: String, value: Any) extends Filter {
+  override def references: Array[String] = Array(attribute) ++ 
findReferences(value)
+}
 
 /**
  * Performs equality comparison, similar to [[EqualTo]]. However, this differs 
from [[EqualTo]]
@@ -43,7 +56,9 @@ case class EqualTo(attribute: String, value: Any) extends 
Filter
  *
  * @since 1.5.0
  */
-case class EqualNullSafe(attribute: String, value: Any) extends Filter
+case class EqualNullSafe(attribute: String, value: Any) extends Filter {
+  override def references: Array[String] = Array(attribute) ++ 
findReferences(value)
+}
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a value
@@ -51,7 +66,9 @@ case class EqualNullSafe(attribute: String, value: Any) 
extends Filter
  *
  * @since 1.3.0
  */
-case class GreaterThan(attribute: String, value: Any) extends Filter
+case class GreaterThan(attribute: String, value: Any) extends Filter {
+  override def references: Array[String] = Array(attribute) ++ 
findReferences(value)
+}
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a value
@@ -59,7 +76,9 @@ case class GreaterThan(attribute: String, value: Any) extends 
Filter
  *
  * @since 1.3.0
  */
-case class GreaterThanOrEqual(attribute: String, value: Any) extends Filter
+case class GreaterThanOrEqual(attribute: String, value: Any) extends Filter {
+  override def references: Array[String] = Array(attribute) ++ 
findReferences(value)
+}
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a value
@@ -67,7 +86,9 @@ case class GreaterThanOrEqual(attribute: String, value: Any) 
extends Filter
  *
  * @since 1.3.0
  */
-case class LessThan(attribute: String, value: Any) extends Filter
+case class LessThan(attribute: String, value: Any) extends Filter {
+  override def references: Array[String] = Array(attribute) ++ 
findReferences(value)
+}
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a value
@@ -75,7 +96,9 @@ case class LessThan(attribute: String, value: Any) extends 
Filter
  *
  * @since 1.3.0
  */
-case class LessThanOrEqual(attribute: String, value: Any) extends Filter
+case class LessThanOrEqual(attribute: String, value: Any) extends Filter {
+  override def references: Array[String] = Array(attribute) ++ 
findReferences(value)
+}
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to one of the 
values in the array.
@@ -99,6 +122,8 @@ case class In(attribute: String, values: Array[Any]) extends 
Filter {
   override def toString: String = {
     s"In($attribute, [${values.mkString(",")}]"
   }
+
+  override def references: Array[String] = Array(attribute) ++ 
values.flatMap(findReferences)
 }
 
 /**
@@ -106,35 +131,45 @@ case class In(attribute: String, values: Array[Any]) 
extends Filter {
  *
  * @since 1.3.0
  */
-case class IsNull(attribute: String) extends Filter
+case class IsNull(attribute: String) extends Filter {
+  override def references: Array[String] = Array(attribute)
+}
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to a non-null 
value.
  *
  * @since 1.3.0
  */
-case class IsNotNull(attribute: String) extends Filter
+case class IsNotNull(attribute: String) extends Filter {
+  override def references: Array[String] = Array(attribute)
+}
 
 /**
  * A filter that evaluates to `true` iff both `left` or `right` evaluate to 
`true`.
  *
  * @since 1.3.0
  */
-case class And(left: Filter, right: Filter) extends Filter
+case class And(left: Filter, right: Filter) extends Filter {
+  override def references: Array[String] = left.references ++ right.references
+}
 
 /**
  * A filter that evaluates to `true` iff at least one of `left` or `right` 
evaluates to `true`.
  *
  * @since 1.3.0
  */
-case class Or(left: Filter, right: Filter) extends Filter
+case class Or(left: Filter, right: Filter) extends Filter {
+  override def references: Array[String] = left.references ++ right.references
+}
 
 /**
  * A filter that evaluates to `true` iff `child` is evaluated to `false`.
  *
  * @since 1.3.0
  */
-case class Not(child: Filter) extends Filter
+case class Not(child: Filter) extends Filter {
+  override def references: Array[String] = child.references
+}
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to
@@ -142,7 +177,9 @@ case class Not(child: Filter) extends Filter
  *
  * @since 1.3.1
  */
-case class StringStartsWith(attribute: String, value: String) extends Filter
+case class StringStartsWith(attribute: String, value: String) extends Filter {
+  override def references: Array[String] = Array(attribute)
+}
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to
@@ -150,7 +187,9 @@ case class StringStartsWith(attribute: String, value: 
String) extends Filter
  *
  * @since 1.3.1
  */
-case class StringEndsWith(attribute: String, value: String) extends Filter
+case class StringEndsWith(attribute: String, value: String) extends Filter {
+  override def references: Array[String] = Array(attribute)
+}
 
 /**
  * A filter that evaluates to `true` iff the attribute evaluates to
@@ -158,4 +197,6 @@ case class StringEndsWith(attribute: String, value: String) 
extends Filter
  *
  * @since 1.3.1
  */
-case class StringContains(attribute: String, value: String) extends Filter
+case class StringContains(attribute: String, value: String) extends Filter {
+  override def references: Array[String] = Array(attribute)
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/c9a67621/sql/core/src/test/scala/org/apache/spark/sql/sources/FiltersSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/sources/FiltersSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/sources/FiltersSuite.scala
new file mode 100644
index 0000000..1cb7a21
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/FiltersSuite.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import org.apache.spark.SparkFunSuite
+
+/**
+ * Unit test suites for data source filters.
+ */
+class FiltersSuite extends SparkFunSuite {
+
+  test("EqualTo references") {
+    assert(EqualTo("a", "1").references.toSeq == Seq("a"))
+    assert(EqualTo("a", EqualTo("b", "2")).references.toSeq == Seq("a", "b"))
+  }
+
+  test("EqualNullSafe references") {
+    assert(EqualNullSafe("a", "1").references.toSeq == Seq("a"))
+    assert(EqualNullSafe("a", EqualTo("b", "2")).references.toSeq == Seq("a", 
"b"))
+  }
+
+  test("GreaterThan references") {
+    assert(GreaterThan("a", "1").references.toSeq == Seq("a"))
+    assert(GreaterThan("a", EqualTo("b", "2")).references.toSeq == Seq("a", 
"b"))
+  }
+
+  test("GreaterThanOrEqual references") {
+    assert(GreaterThanOrEqual("a", "1").references.toSeq == Seq("a"))
+    assert(GreaterThanOrEqual("a", EqualTo("b", "2")).references.toSeq == 
Seq("a", "b"))
+  }
+
+  test("LessThan references") {
+    assert(LessThan("a", "1").references.toSeq == Seq("a"))
+    assert(LessThan("a", EqualTo("b", "2")).references.toSeq == Seq("a", "b"))
+  }
+
+  test("LessThanOrEqual references") {
+    assert(LessThanOrEqual("a", "1").references.toSeq == Seq("a"))
+    assert(LessThanOrEqual("a", EqualTo("b", "2")).references.toSeq == 
Seq("a", "b"))
+  }
+
+  test("In references") {
+    assert(In("a", Array("1")).references.toSeq == Seq("a"))
+    assert(In("a", Array("1", EqualTo("b", "2"))).references.toSeq == Seq("a", 
"b"))
+  }
+
+  test("IsNull references") {
+    assert(IsNull("a").references.toSeq == Seq("a"))
+  }
+
+  test("IsNotNull references") {
+    assert(IsNotNull("a").references.toSeq == Seq("a"))
+  }
+
+  test("And references") {
+    assert(And(EqualTo("a", "1"), EqualTo("b", "1")).references.toSeq == 
Seq("a", "b"))
+  }
+
+  test("Or references") {
+    assert(Or(EqualTo("a", "1"), EqualTo("b", "1")).references.toSeq == 
Seq("a", "b"))
+  }
+
+  test("StringStartsWith references") {
+    assert(StringStartsWith("a", "str").references.toSeq == Seq("a"))
+  }
+
+  test("StringEndsWith references") {
+    assert(StringEndsWith("a", "str").references.toSeq == Seq("a"))
+  }
+
+  test("StringContains references") {
+    assert(StringContains("a", "str").references.toSeq == Seq("a"))
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to