[
https://issues.apache.org/jira/browse/FLINK-2828?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15022102#comment-15022102
]
ASF GitHub Bot commented on FLINK-2828:
---------------------------------------
Github user tillrohrmann commented on a diff in the pull request:
https://github.com/apache/flink/pull/1237#discussion_r45602715
--- Diff:
flink-staging/flink-table/src/main/scala/org/apache/flink/api/table/expressions/analysis/PredicatePushdown.scala
---
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.api.table.expressions.analysis
+
+import org.apache.flink.api.table.expressions._
+import org.apache.flink.api.table.expressions.analysis.FieldBacktracker
+ .resolveFieldNameAndTableSource
+import
org.apache.flink.api.table.expressions.analysis.PredicatePruner.pruneExpr
+import org.apache.flink.api.table.input.AdaptiveTableSource
+import org.apache.flink.api.table.plan._
+import org.apache.flink.api.table.trees.Rule
+
+/**
+ * Pushes constant predicates (e.g. a===12 && b.isNotNull) to each
corresponding
+ * AdaptiveTableSource that support predicates.
+ */
+class PredicatePushdown(val inputOperation: PlanNode) extends
Rule[Expression] {
+
+ def apply(expr: Expression) = {
+ // get all table sources where predicates can be push into
+ val tableSources = getPushableTableSources(inputOperation)
+
+ // prune expression tree such that it only contains constant predicates
+ // such as a=1,a="Hello World", isNull(a) but not a=b
+ val constantExpr = pruneExpr(isResolvedAndConstant, expr)
+
+ // push predicates to each table source respectively
+ for (ts <- tableSources) {
+ // prune expression tree such that it only contains field references
of ts
+ val tsExpr = pruneExpr((e) => isSameTableSource(e, ts), constantExpr)
+
+ // resolve field names to field names of the table source
+ val result = tsExpr.transformPost {
+ case rfr@ResolvedFieldReference(fieldName, typeInfo) =>
+ ResolvedFieldReference(
+ resolveFieldNameAndTableSource(inputOperation,
fieldName).get._2,
+ typeInfo
+ )
+ }
+ // push down predicates
+ if (result != NopExpression()) {
+ ts.notifyPredicates(result)
+ }
+ }
+ expr
+ }
+
+ //
----------------------------------------------------------------------------------------------
+
+ /**
+ * @return all AdaptiveTableSources the given PlanNode contains
+ */
+ def getPushableTableSources(tree: PlanNode): Seq[AdaptiveTableSource] =
tree match {
+ case Root(ts: AdaptiveTableSource, _) => Seq(ts)
+ case pn: PlanNode =>
+ pn.children flatMap { child => getPushableTableSources(child ) }
+ case _ => Seq() // add nothing
+ }
+
+ /**
+ *
+ * @return true if the given expression is a predicate that consists of a
+ * ResolvedFieldReference and a constant/literal
+ * e.g. a=1, 2<c
+ */
+ def isResolvedAndConstant(expr: Expression) : Boolean = {
+ expr match {
+ case bc: BinaryComparison if bc.left.isInstanceOf[Literal]
+ && bc.right.isInstanceOf[ResolvedFieldReference] =>
+ true
+ case bc: BinaryComparison if bc.right.isInstanceOf[Literal]
+ && bc.left.isInstanceOf[ResolvedFieldReference] =>
+ true
+ case ue@(IsNotNull(_) | IsNull(_)) =>
+ val child = ue.asInstanceOf[UnaryExpression].child
+ child.isInstanceOf[ResolvedFieldReference]
+ case And(_,_) | Or(_,_) | Not(_) =>
+ true
+ case _ => false
+ }
+ }
+
+ /**
+ * @return true if the given expression only consists of
ResolvedFieldReference of
+ * the same given AdaptiveTableSource
+ */
+ def isSameTableSource(expr: Expression, ts: AdaptiveTableSource) :
Boolean = {
+ expr match {
+ case bc: BinaryComparison if
bc.right.isInstanceOf[ResolvedFieldReference] =>
+ val fieldRef = bc.right.asInstanceOf[ResolvedFieldReference]
+ val resolvedField = resolveFieldNameAndTableSource(inputOperation,
fieldRef.name)
+ resolvedField.isDefined && resolvedField.get._1 == ts
--- End diff --
pattern matching is more scalaesque
> Add interfaces for Table API input formats
> ------------------------------------------
>
> Key: FLINK-2828
> URL: https://issues.apache.org/jira/browse/FLINK-2828
> Project: Flink
> Issue Type: New Feature
> Components: Table API
> Reporter: Timo Walther
> Assignee: Timo Walther
>
> In order to support input formats for the Table API, interfaces are
> necessary. I propose two types of TableSources:
> - AdaptiveTableSources can adapt their output to the requirements of the
> plan. Although the output schema stays the same, the TableSource can react on
> field resolution and/or predicates internally and can return adapted
> DataSet/DataStream versions in the "translate" step.
> - StaticTableSources are an easy way to provide the Table API with additional
> input formats without much implementation effort (e.g. for fromCsvFile())
> TableSources need to be deeply integrated into the Table API.
> The TableEnvironment requires a newly introduced AbstractExecutionEnvironment
> (common super class of all ExecutionEnvironments for DataSets and
> DataStreams).
> Here's what a TableSource can see from more complicated queries:
> {code}
> getTableJava(tableSource1)
> .filter("a===5 || a===6")
> .select("a as a4, b as b4, c as c4")
> .filter("b4===7")
> .join(getTableJava(tableSource2))
> .where("a===a4 && c==='Test' && c4==='Test2'")
> // Result predicates for tableSource1:
> // List("a===5 || a===6", "b===7", "c==='Test2'")
> // Result predicates for tableSource2:
> // List("c==='Test'")
> // Result resolved fields for tableSource1 (true = filtering,
> false=selection):
> // Set(("a", true), ("a", false), ("b", true), ("b", false), ("c", false),
> ("c", true))
> // Result resolved fields for tableSource2 (true = filtering,
> false=selection):
> // Set(("a", true), ("c", true))
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)