[ 
https://issues.apache.org/jira/browse/FLINK-2828?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15022102#comment-15022102
 ] 

ASF GitHub Bot commented on FLINK-2828:
---------------------------------------

Github user tillrohrmann commented on a diff in the pull request:

    https://github.com/apache/flink/pull/1237#discussion_r45602715
  
    --- Diff: 
flink-staging/flink-table/src/main/scala/org/apache/flink/api/table/expressions/analysis/PredicatePushdown.scala
 ---
    @@ -0,0 +1,120 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.flink.api.table.expressions.analysis
    +
    +import org.apache.flink.api.table.expressions._
    +import org.apache.flink.api.table.expressions.analysis.FieldBacktracker
    +  .resolveFieldNameAndTableSource
    +import 
org.apache.flink.api.table.expressions.analysis.PredicatePruner.pruneExpr
    +import org.apache.flink.api.table.input.AdaptiveTableSource
    +import org.apache.flink.api.table.plan._
    +import org.apache.flink.api.table.trees.Rule
    +
    +/**
    + * Pushes constant predicates (e.g. a===12 && b.isNotNull) to each 
corresponding
    + * AdaptiveTableSource that support predicates.
    + */
    +class PredicatePushdown(val inputOperation: PlanNode) extends 
Rule[Expression] {
    +
    +  def apply(expr: Expression) = {
    +    // get all table sources where predicates can be push into
    +    val tableSources = getPushableTableSources(inputOperation)
    +
    +    // prune expression tree such that it only contains constant predicates
    +    // such as a=1,a="Hello World", isNull(a) but not a=b
    +    val constantExpr = pruneExpr(isResolvedAndConstant, expr)
    +
    +    // push predicates to each table source respectively
    +    for (ts <- tableSources) {
    +      // prune expression tree such that it only contains field references 
of ts
    +      val tsExpr = pruneExpr((e) => isSameTableSource(e, ts), constantExpr)
    +
    +      // resolve field names to field names of the table source
    +      val result = tsExpr.transformPost {
    +        case rfr@ResolvedFieldReference(fieldName, typeInfo) =>
    +          ResolvedFieldReference(
    +            resolveFieldNameAndTableSource(inputOperation, 
fieldName).get._2,
    +            typeInfo
    +          )
    +      }
    +      // push down predicates
    +      if (result != NopExpression()) {
    +        ts.notifyPredicates(result)
    +      }
    +    }
    +    expr
    +  }
    +
    +  // 
----------------------------------------------------------------------------------------------
    +
    +  /**
    +   * @return all AdaptiveTableSources the given PlanNode contains
    +   */
    +  def getPushableTableSources(tree: PlanNode): Seq[AdaptiveTableSource] = 
tree match {
    +    case Root(ts: AdaptiveTableSource, _) => Seq(ts)
    +    case pn: PlanNode =>
    +      pn.children flatMap { child => getPushableTableSources(child ) }
    +    case _ => Seq() // add nothing
    +  }
    +
    +  /**
    +   * 
    +   * @return true if the given expression is a predicate that consists of a
    +   *         ResolvedFieldReference and a constant/literal
    +   *         e.g. a=1, 2<c
    +   */
    +  def isResolvedAndConstant(expr: Expression) : Boolean = {
    +    expr match {
    +      case bc: BinaryComparison if bc.left.isInstanceOf[Literal]
    +          && bc.right.isInstanceOf[ResolvedFieldReference] =>
    +        true
    +      case bc: BinaryComparison if bc.right.isInstanceOf[Literal]
    +          && bc.left.isInstanceOf[ResolvedFieldReference] =>
    +        true
    +      case ue@(IsNotNull(_) | IsNull(_)) =>
    +        val child = ue.asInstanceOf[UnaryExpression].child
    +        child.isInstanceOf[ResolvedFieldReference]
    +      case And(_,_) | Or(_,_) | Not(_) =>
    +        true
    +      case _ => false
    +    }
    +  }
    +
    +  /**
    +   * @return true if the given expression only consists of 
ResolvedFieldReference of
    +   *         the same given AdaptiveTableSource
    +   */
    +  def isSameTableSource(expr: Expression, ts: AdaptiveTableSource) : 
Boolean = {
    +    expr match {
    +      case bc: BinaryComparison if 
bc.right.isInstanceOf[ResolvedFieldReference] =>
    +        val fieldRef = bc.right.asInstanceOf[ResolvedFieldReference]
    +        val resolvedField = resolveFieldNameAndTableSource(inputOperation, 
fieldRef.name)
    +        resolvedField.isDefined && resolvedField.get._1 == ts
    --- End diff --
    
    pattern matching is more scalaesque


> Add interfaces for Table API input formats
> ------------------------------------------
>
>                 Key: FLINK-2828
>                 URL: https://issues.apache.org/jira/browse/FLINK-2828
>             Project: Flink
>          Issue Type: New Feature
>          Components: Table API
>            Reporter: Timo Walther
>            Assignee: Timo Walther
>
> In order to support input formats for the Table API, interfaces are 
> necessary. I propose two types of TableSources:
> - AdaptiveTableSources can adapt their output to the requirements of the 
> plan. Although the output schema stays the same, the TableSource can react on 
> field resolution and/or predicates internally and can return adapted 
> DataSet/DataStream versions in the "translate" step.
> - StaticTableSources are an easy way to provide the Table API with additional 
> input formats without much implementation effort (e.g. for fromCsvFile())
> TableSources need to be deeply integrated into the Table API.
> The TableEnvironment requires a newly introduced AbstractExecutionEnvironment 
> (common super class of all ExecutionEnvironments for DataSets and 
> DataStreams).
> Here's what a TableSource can see from more complicated queries:
> {code}
> getTableJava(tableSource1)
>   .filter("a===5 || a===6")
>   .select("a as a4, b as b4, c as c4")
>   .filter("b4===7")
>   .join(getTableJava(tableSource2))
>   .where("a===a4 && c==='Test' && c4==='Test2'")
> // Result predicates for tableSource1:
> //  List("a===5 || a===6", "b===7", "c==='Test2'")
> // Result predicates for tableSource2:
> //  List("c==='Test'")
> // Result resolved fields for tableSource1 (true = filtering, 
> false=selection):
> //  Set(("a", true), ("a", false), ("b", true), ("b", false), ("c", false), 
> ("c", true))
> // Result resolved fields for tableSource2 (true = filtering, 
> false=selection):
> //  Set(("a", true), ("c", true))
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to