[ 
https://issues.apache.org/jira/browse/TAJO-1597?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14639092#comment-14639092
 ] 

ASF GitHub Bot commented on TAJO-1597:
--------------------------------------

Github user hyunsik commented on a diff in the pull request:

    https://github.com/apache/tajo/pull/643#discussion_r35341281
  
    --- Diff: 
tajo-plan/src/main/java/org/apache/tajo/plan/algebra/BaseAlgebraVisitor.java ---
    @@ -52,221 +52,221 @@ public RESULT visit(CONTEXT ctx, Stack<Expr> stack, 
Expr expr) throws TajoExcept
         RESULT current;
     
         switch (expr.getType()) {
    -    case SetSession:
    -      current = visitSetSession(ctx, stack, (SetSession) expr);
    -      break;
    -
    -    case Projection:
    -      current = visitProjection(ctx, stack, (Projection) expr);
    -      break;
    -    case Limit:
    -      current = visitLimit(ctx, stack, (Limit) expr);
    -      break;
    -    case Sort:
    -      current = visitSort(ctx, stack, (Sort) expr);
    -      break;
    -    case Having:
    -      current = visitHaving(ctx, stack, (Having) expr);
    -      break;
    -    case Aggregation:
    -      current = visitGroupBy(ctx, stack, (Aggregation) expr);
    -      break;
    -    case Join:
    -      current = visitJoin(ctx, stack, (Join) expr);
    -      break;
    -    case Filter:
    -      current = visitFilter(ctx, stack, (Selection) expr);
    -      break;
    -    case Union:
    -      current = visitUnion(ctx, stack, (SetOperation) expr);
    -      break;
    -    case Except:
    -      current = visitExcept(ctx, stack, (SetOperation) expr);
    -      break;
    -    case Intersect:
    -      current = visitIntersect(ctx, stack, (SetOperation) expr);
    -      break;
    -    case SimpleTableSubQuery:
    -      current = visitSimpleTableSubQuery(ctx, stack, (SimpleTableSubQuery) 
expr);
    -      break;
    -    case TablePrimaryTableSubQuery:
    -      current = visitTableSubQuery(ctx, stack, (TablePrimarySubQuery) 
expr);
    -      break;
    -    case RelationList:
    -      current = visitRelationList(ctx, stack, (RelationList) expr);
    -      break;
    -    case Relation:
    -      current = visitRelation(ctx, stack, (Relation) expr);
    -      break;
    -    case ScalarSubQuery:
    -      current = visitScalarSubQuery(ctx, stack, (ScalarSubQuery) expr);
    -      break;
    -    case Explain:
    -      current = visitExplain(ctx, stack, (Explain) expr);
    -      break;
    -
    -    case CreateDatabase:
    -      current = visitCreateDatabase(ctx, stack, (CreateDatabase) expr);
    -      break;
    -    case DropDatabase:
    -      current = visitDropDatabase(ctx, stack, (DropDatabase) expr);
    -      break;
    -    case CreateTable:
    -      current = visitCreateTable(ctx, stack, (CreateTable) expr);
    -      break;
    -    case DropTable:
    -      current = visitDropTable(ctx, stack, (DropTable) expr);
    -      break;
    -    case AlterTablespace:
    -      current = visitAlterTablespace(ctx, stack, (AlterTablespace) expr);
    -      break;
    -    case AlterTable:
    -      current = visitAlterTable(ctx, stack, (AlterTable) expr);
    -      break;
    -    case TruncateTable:
    -      current = visitTruncateTable(ctx, stack, (TruncateTable)expr);
    -      break;
    -
    -    case Insert:
    -      current = visitInsert(ctx, stack, (Insert) expr);
    -      break;
    -
    -    case And:
    -      current = visitAnd(ctx, stack, (BinaryOperator) expr);
    -      break;
    -    case Or:
    -      current = visitOr(ctx, stack, (BinaryOperator) expr);
    -      break;
    -    case Not:
    -      current = visitNot(ctx, stack, (NotExpr) expr);
    -      break;
    -
    -    case Equals:
    -      current = visitEquals(ctx, stack, (BinaryOperator) expr);
    -      break;
    -    case NotEquals:
    -      current = visitNotEquals(ctx, stack, (BinaryOperator) expr);
    -      break;
    -    case LessThan:
    -      current = visitLessThan(ctx, stack, (BinaryOperator) expr);
    -      break;
    -    case LessThanOrEquals:
    -      current = visitLessThanOrEquals(ctx, stack, (BinaryOperator) expr);
    -      break;
    -    case GreaterThan:
    -      current = visitGreaterThan(ctx, stack, (BinaryOperator) expr);
    -      break;
    -    case GreaterThanOrEquals:
    -      current = visitGreaterThanOrEquals(ctx, stack, (BinaryOperator) 
expr);
    -      break;
    -
    -    // Other Predicates
    -    case Between:
    -      current = visitBetween(ctx, stack, (BetweenPredicate) expr);
    -      break;
    -    case CaseWhen:
    -      current = visitCaseWhen(ctx, stack, (CaseWhenPredicate) expr);
    -      break;
    -    case IsNullPredicate:
    -      current = visitIsNullPredicate(ctx, stack, (IsNullPredicate) expr);
    -      break;
    -    case InPredicate:
    -      current = visitInPredicate(ctx, stack, (InPredicate) expr);
    -      break;
    -    case ValueList:
    -      current = visitValueListExpr(ctx, stack, (ValueListExpr) expr);
    -      break;
    -    case ExistsPredicate:
    -      current = visitExistsPredicate(ctx, stack, (ExistsPredicate) expr);
    -      break;
    -
    -    // String Operator or Pattern Matching Predicates
    -    case LikePredicate:
    -      current = visitLikePredicate(ctx, stack, (PatternMatchPredicate) 
expr);
    -      break;
    -    case SimilarToPredicate:
    -      current = visitSimilarToPredicate(ctx, stack, 
(PatternMatchPredicate) expr);
    -      break;
    -    case Regexp:
    -      current = visitRegexpPredicate(ctx, stack, (PatternMatchPredicate) 
expr);
    -      break;
    -    case Concatenate:
    -      current = visitConcatenate(ctx, stack, (BinaryOperator) expr);
    -      break;
    -
    -    // Arithmetic Operators
    -    case Plus:
    -      current = visitPlus(ctx, stack, (BinaryOperator) expr);
    -      break;
    -    case Minus:
    -      current = visitMinus(ctx, stack, (BinaryOperator) expr);
    -      break;
    -    case Multiply:
    -      current = visitMultiply(ctx, stack, (BinaryOperator) expr);
    -      break;
    -    case Divide:
    -      current = visitDivide(ctx, stack, (BinaryOperator) expr);
    -      break;
    -    case Modular:
    -      current = visitModular(ctx, stack, (BinaryOperator) expr);
    -      break;
    -
    -    // Other Expressions
    -    case Sign:
    -      current = visitSign(ctx, stack, (SignedExpr) expr);
    -      break;
    -    case Column:
    -      current = visitColumnReference(ctx, stack, (ColumnReferenceExpr) 
expr);
    -      break;
    -    case Target:
    -      current = visitTargetExpr(ctx, stack, (NamedExpr) expr);
    -      break;
    -    case Function:
    -      current = visitFunction(ctx, stack, (FunctionExpr) expr);
    -      break;
    -    case Asterisk:
    -      current = visitQualifiedAsterisk(ctx, stack, (QualifiedAsteriskExpr) 
expr);
    -      break;
    -
    -
    -    case CountRowsFunction:
    -      current = visitCountRowsFunction(ctx, stack, (CountRowsFunctionExpr) 
expr);
    -      break;
    -    case GeneralSetFunction:
    -      current = visitGeneralSetFunction(ctx, stack, 
(GeneralSetFunctionExpr) expr);
    -      break;
    -    case WindowFunction:
    -      current = visitWindowFunction(ctx, stack, (WindowFunctionExpr) expr);
    -      break;
    -
    -
    -    case DataType:
    -      current = visitDataType(ctx, stack, (DataTypeExpr) expr);
    -      break;
    -    case Cast:
    -      current = visitCastExpr(ctx, stack, (CastExpr) expr);
    -      break;
    -    case Literal:
    -      current = visitLiteral(ctx, stack, (LiteralValue) expr);
    -      break;
    -    case NullLiteral:
    -      current = visitNullLiteral(ctx, stack, (NullLiteral) expr);
    -      break;
    -    case DateLiteral:
    -      current = visitDateLiteral(ctx, stack, (DateLiteral) expr);
    -      break;
    -    case TimeLiteral:
    -      current = visitTimeLiteral(ctx, stack, (TimeLiteral) expr);
    -      break;
    -    case TimestampLiteral:
    -      current = visitTimestampLiteral(ctx, stack, (TimestampLiteral) expr);
    -      break;
    -    case IntervalLiteral:
    -      current = visitIntervalLiteral(ctx, stack, (IntervalLiteral) expr);
    -      break;
    -
    -    default:
    -      throw new TajoInternalError("Cannot support this type algebra \"" + 
expr.getType() + "\"");
    +      case SetSession:
    --- End diff --
    
    The wrong coding format seems to be applied. {{switch}} and {{case}} should 
be the same column position.


> Problem of ignoring theta join condition
> ----------------------------------------
>
>                 Key: TAJO-1597
>                 URL: https://issues.apache.org/jira/browse/TAJO-1597
>             Project: Tajo
>          Issue Type: Bug
>          Components: Planner/Optimizer
>            Reporter: Jihoon Son
>            Assignee: Jihoon Son
>             Fix For: 0.11.0
>
>
> Tajo currently does not support theta join, non-equi theta join conditions 
> must be evaluated as a filter after join execution. However, when non-equi 
> theta join conditions are included at on clauses, those conditions are 
> disappeared after filter push down optimization.
> This is because we assume that filters are pushed down from top during FPD 
> phase, but theta join conditions from the on clause are not.
> For example, let me suppose a query that contains a projection after a join 
> as follows.
> {noformat}
> projection
>        |
>      join (contains a theta join condition)
>     /     \
> scan scan
> {noformat}
> During FPD optimization, the theta join condition is come up to the 
> projection node (FilterPushDownRule.visitProjection()). This condition is 
> converted based on information of transformedMap, which is created by 
> findCanPushdownAndTransform() before visiting the join node. Obviously, any 
> information for the theta join condition are not contained in transformedMap, 
> it is ignored.
> You can reproduce this bug as follows.
> {noformat}
> default> select n_nationkey, n_name, n_regionkey, t.cnt from nation n join (  
>  select r_regionkey, count(*) as cnt   from nation n   join region r on 
> (n.n_regionkey = r.r_regionkey)   group by r_regionkey ) t  on  
> (n.n_regionkey = t.r_regionkey) and n.n_nationkey > t.cnt  order by 
> n_nationkey;
> ...
> -----------------------------
> Query Block Graph
> -----------------------------
> |-#ROOT
>    |-#QB_0
> -----------------------------
> Optimization Log:
> [LogicalPlan]
>       > ProjectionNode is eliminated.
>       > ProjectionNode is eliminated.
> [#QB_0]
>       > Non-optimized join order: (default.nation ⋈θ default.region) (cost: 
> 86513.6)
>       > Optimized join order    : (default.nation ⋈θ default.region) (cost: 
> 86513.6)
> [#ROOT]
>       > Non-optimized join order: (default.nation ⋈θ default.t) (cost: 
> 1.924062464E7)
>       > Optimized join order    : (default.nation ⋈θ default.t) (cost: 
> 1.924062464E7)
> -----------------------------
> SORT(8)
>   => Sort Keys: default.n.n_nationkey (INT4) (asc)
>    JOIN(12)(INNER)
>      => Join Cond: default.n.n_regionkey (INT4) = default.t.r_regionkey (INT4)
>      => target list: default.n.n_nationkey (INT4), default.n.n_name (TEXT), 
> default.n.n_regionkey (INT4), default.t.cnt (INT8)
>      => out schema: {(4) default.n.n_nationkey (INT4), default.n.n_name 
> (TEXT), default.n.n_regionkey (INT4), default.t.cnt (INT8)}
>      => in schema: {(5) default.n.n_nationkey (INT4), default.n.n_name 
> (TEXT), default.n.n_regionkey (INT4), default.t.cnt (INT8), 
> default.t.r_regionkey (INT4)}
>       TABLE_SUBQUERY(6) as default.t
>         => Targets: default.t.cnt (INT8), default.t.r_regionkey (INT4)
>         => out schema: {(2) default.t.cnt (INT8), default.t.r_regionkey 
> (INT4)}
>         => in  schema: {(2) default.t.r_regionkey (INT4), default.t.cnt 
> (INT8)}
>          GROUP_BY(4)(r_regionkey)
>            => exprs: (count())
>            => target list: default.r.r_regionkey (INT4), cnt (INT8)
>            => out schema:{(2) default.r.r_regionkey (INT4), cnt (INT8)}
>            => in schema:{(4) default.r.r_regionkey (INT4), 
> default.n.n_regionkey (INT4), default.n.n_nationkey (INT4), default.n.n_name 
> (TEXT)}
>             JOIN(11)(INNER)
>               => Join Cond: default.n.n_regionkey (INT4) = 
> default.r.r_regionkey (INT4)
>               => target list: default.r.r_regionkey (INT4), 
> default.n.n_regionkey (INT4), default.n.n_nationkey (INT4), default.n.n_name 
> (TEXT)
>               => out schema: {(4) default.r.r_regionkey (INT4), 
> default.n.n_regionkey (INT4), default.n.n_nationkey (INT4), default.n.n_name 
> (TEXT)}
>               => in schema: {(4) default.n.n_regionkey (INT4), 
> default.n.n_nationkey (INT4), default.n.n_name (TEXT), default.r.r_regionkey 
> (INT4)}
>                SCAN(2) on default.region as r
>                  => target list: default.r.r_regionkey (INT4)
>                  => out schema: {(1) default.r.r_regionkey (INT4)}
>                  => in schema: {(3) default.r.r_regionkey (INT4), 
> default.r.r_name (TEXT), default.r.r_comment (TEXT)}
>                SCAN(1) on default.nation as n
>                  => target list: default.n.n_regionkey (INT4), 
> default.n.n_nationkey (INT4), default.n.n_name (TEXT)
>                  => out schema: {(3) default.n.n_regionkey (INT4), 
> default.n.n_nationkey (INT4), default.n.n_name (TEXT)}
>                  => in schema: {(4) default.n.n_nationkey (INT4), 
> default.n.n_name (TEXT), default.n.n_regionkey (INT4), default.n.n_comment 
> (TEXT)}
>       SCAN(0) on default.nation as n
>         => target list: default.n.n_nationkey (INT4), default.n.n_name 
> (TEXT), default.n.n_regionkey (INT4)
>         => out schema: {(3) default.n.n_nationkey (INT4), default.n.n_name 
> (TEXT), default.n.n_regionkey (INT4)}
>         => in schema: {(4) default.n.n_nationkey (INT4), default.n.n_name 
> (TEXT), default.n.n_regionkey (INT4), default.n.n_comment (TEXT)}
> ...
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to