[
https://issues.apache.org/jira/browse/TAJO-1597?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14639092#comment-14639092
]
ASF GitHub Bot commented on TAJO-1597:
--------------------------------------
Github user hyunsik commented on a diff in the pull request:
https://github.com/apache/tajo/pull/643#discussion_r35341281
--- Diff:
tajo-plan/src/main/java/org/apache/tajo/plan/algebra/BaseAlgebraVisitor.java ---
@@ -52,221 +52,221 @@ public RESULT visit(CONTEXT ctx, Stack<Expr> stack,
Expr expr) throws TajoExcept
RESULT current;
switch (expr.getType()) {
- case SetSession:
- current = visitSetSession(ctx, stack, (SetSession) expr);
- break;
-
- case Projection:
- current = visitProjection(ctx, stack, (Projection) expr);
- break;
- case Limit:
- current = visitLimit(ctx, stack, (Limit) expr);
- break;
- case Sort:
- current = visitSort(ctx, stack, (Sort) expr);
- break;
- case Having:
- current = visitHaving(ctx, stack, (Having) expr);
- break;
- case Aggregation:
- current = visitGroupBy(ctx, stack, (Aggregation) expr);
- break;
- case Join:
- current = visitJoin(ctx, stack, (Join) expr);
- break;
- case Filter:
- current = visitFilter(ctx, stack, (Selection) expr);
- break;
- case Union:
- current = visitUnion(ctx, stack, (SetOperation) expr);
- break;
- case Except:
- current = visitExcept(ctx, stack, (SetOperation) expr);
- break;
- case Intersect:
- current = visitIntersect(ctx, stack, (SetOperation) expr);
- break;
- case SimpleTableSubQuery:
- current = visitSimpleTableSubQuery(ctx, stack, (SimpleTableSubQuery)
expr);
- break;
- case TablePrimaryTableSubQuery:
- current = visitTableSubQuery(ctx, stack, (TablePrimarySubQuery)
expr);
- break;
- case RelationList:
- current = visitRelationList(ctx, stack, (RelationList) expr);
- break;
- case Relation:
- current = visitRelation(ctx, stack, (Relation) expr);
- break;
- case ScalarSubQuery:
- current = visitScalarSubQuery(ctx, stack, (ScalarSubQuery) expr);
- break;
- case Explain:
- current = visitExplain(ctx, stack, (Explain) expr);
- break;
-
- case CreateDatabase:
- current = visitCreateDatabase(ctx, stack, (CreateDatabase) expr);
- break;
- case DropDatabase:
- current = visitDropDatabase(ctx, stack, (DropDatabase) expr);
- break;
- case CreateTable:
- current = visitCreateTable(ctx, stack, (CreateTable) expr);
- break;
- case DropTable:
- current = visitDropTable(ctx, stack, (DropTable) expr);
- break;
- case AlterTablespace:
- current = visitAlterTablespace(ctx, stack, (AlterTablespace) expr);
- break;
- case AlterTable:
- current = visitAlterTable(ctx, stack, (AlterTable) expr);
- break;
- case TruncateTable:
- current = visitTruncateTable(ctx, stack, (TruncateTable)expr);
- break;
-
- case Insert:
- current = visitInsert(ctx, stack, (Insert) expr);
- break;
-
- case And:
- current = visitAnd(ctx, stack, (BinaryOperator) expr);
- break;
- case Or:
- current = visitOr(ctx, stack, (BinaryOperator) expr);
- break;
- case Not:
- current = visitNot(ctx, stack, (NotExpr) expr);
- break;
-
- case Equals:
- current = visitEquals(ctx, stack, (BinaryOperator) expr);
- break;
- case NotEquals:
- current = visitNotEquals(ctx, stack, (BinaryOperator) expr);
- break;
- case LessThan:
- current = visitLessThan(ctx, stack, (BinaryOperator) expr);
- break;
- case LessThanOrEquals:
- current = visitLessThanOrEquals(ctx, stack, (BinaryOperator) expr);
- break;
- case GreaterThan:
- current = visitGreaterThan(ctx, stack, (BinaryOperator) expr);
- break;
- case GreaterThanOrEquals:
- current = visitGreaterThanOrEquals(ctx, stack, (BinaryOperator)
expr);
- break;
-
- // Other Predicates
- case Between:
- current = visitBetween(ctx, stack, (BetweenPredicate) expr);
- break;
- case CaseWhen:
- current = visitCaseWhen(ctx, stack, (CaseWhenPredicate) expr);
- break;
- case IsNullPredicate:
- current = visitIsNullPredicate(ctx, stack, (IsNullPredicate) expr);
- break;
- case InPredicate:
- current = visitInPredicate(ctx, stack, (InPredicate) expr);
- break;
- case ValueList:
- current = visitValueListExpr(ctx, stack, (ValueListExpr) expr);
- break;
- case ExistsPredicate:
- current = visitExistsPredicate(ctx, stack, (ExistsPredicate) expr);
- break;
-
- // String Operator or Pattern Matching Predicates
- case LikePredicate:
- current = visitLikePredicate(ctx, stack, (PatternMatchPredicate)
expr);
- break;
- case SimilarToPredicate:
- current = visitSimilarToPredicate(ctx, stack,
(PatternMatchPredicate) expr);
- break;
- case Regexp:
- current = visitRegexpPredicate(ctx, stack, (PatternMatchPredicate)
expr);
- break;
- case Concatenate:
- current = visitConcatenate(ctx, stack, (BinaryOperator) expr);
- break;
-
- // Arithmetic Operators
- case Plus:
- current = visitPlus(ctx, stack, (BinaryOperator) expr);
- break;
- case Minus:
- current = visitMinus(ctx, stack, (BinaryOperator) expr);
- break;
- case Multiply:
- current = visitMultiply(ctx, stack, (BinaryOperator) expr);
- break;
- case Divide:
- current = visitDivide(ctx, stack, (BinaryOperator) expr);
- break;
- case Modular:
- current = visitModular(ctx, stack, (BinaryOperator) expr);
- break;
-
- // Other Expressions
- case Sign:
- current = visitSign(ctx, stack, (SignedExpr) expr);
- break;
- case Column:
- current = visitColumnReference(ctx, stack, (ColumnReferenceExpr)
expr);
- break;
- case Target:
- current = visitTargetExpr(ctx, stack, (NamedExpr) expr);
- break;
- case Function:
- current = visitFunction(ctx, stack, (FunctionExpr) expr);
- break;
- case Asterisk:
- current = visitQualifiedAsterisk(ctx, stack, (QualifiedAsteriskExpr)
expr);
- break;
-
-
- case CountRowsFunction:
- current = visitCountRowsFunction(ctx, stack, (CountRowsFunctionExpr)
expr);
- break;
- case GeneralSetFunction:
- current = visitGeneralSetFunction(ctx, stack,
(GeneralSetFunctionExpr) expr);
- break;
- case WindowFunction:
- current = visitWindowFunction(ctx, stack, (WindowFunctionExpr) expr);
- break;
-
-
- case DataType:
- current = visitDataType(ctx, stack, (DataTypeExpr) expr);
- break;
- case Cast:
- current = visitCastExpr(ctx, stack, (CastExpr) expr);
- break;
- case Literal:
- current = visitLiteral(ctx, stack, (LiteralValue) expr);
- break;
- case NullLiteral:
- current = visitNullLiteral(ctx, stack, (NullLiteral) expr);
- break;
- case DateLiteral:
- current = visitDateLiteral(ctx, stack, (DateLiteral) expr);
- break;
- case TimeLiteral:
- current = visitTimeLiteral(ctx, stack, (TimeLiteral) expr);
- break;
- case TimestampLiteral:
- current = visitTimestampLiteral(ctx, stack, (TimestampLiteral) expr);
- break;
- case IntervalLiteral:
- current = visitIntervalLiteral(ctx, stack, (IntervalLiteral) expr);
- break;
-
- default:
- throw new TajoInternalError("Cannot support this type algebra \"" +
expr.getType() + "\"");
+ case SetSession:
--- End diff --
The wrong coding format seems to be applied. {{switch}} and {{case}} should
be the same column position.
> Problem of ignoring theta join condition
> ----------------------------------------
>
> Key: TAJO-1597
> URL: https://issues.apache.org/jira/browse/TAJO-1597
> Project: Tajo
> Issue Type: Bug
> Components: Planner/Optimizer
> Reporter: Jihoon Son
> Assignee: Jihoon Son
> Fix For: 0.11.0
>
>
> Tajo currently does not support theta join, non-equi theta join conditions
> must be evaluated as a filter after join execution. However, when non-equi
> theta join conditions are included at on clauses, those conditions are
> disappeared after filter push down optimization.
> This is because we assume that filters are pushed down from top during FPD
> phase, but theta join conditions from the on clause are not.
> For example, let me suppose a query that contains a projection after a join
> as follows.
> {noformat}
> projection
> |
> join (contains a theta join condition)
> / \
> scan scan
> {noformat}
> During FPD optimization, the theta join condition is come up to the
> projection node (FilterPushDownRule.visitProjection()). This condition is
> converted based on information of transformedMap, which is created by
> findCanPushdownAndTransform() before visiting the join node. Obviously, any
> information for the theta join condition are not contained in transformedMap,
> it is ignored.
> You can reproduce this bug as follows.
> {noformat}
> default> select n_nationkey, n_name, n_regionkey, t.cnt from nation n join (
> select r_regionkey, count(*) as cnt from nation n join region r on
> (n.n_regionkey = r.r_regionkey) group by r_regionkey ) t on
> (n.n_regionkey = t.r_regionkey) and n.n_nationkey > t.cnt order by
> n_nationkey;
> ...
> -----------------------------
> Query Block Graph
> -----------------------------
> |-#ROOT
> |-#QB_0
> -----------------------------
> Optimization Log:
> [LogicalPlan]
> > ProjectionNode is eliminated.
> > ProjectionNode is eliminated.
> [#QB_0]
> > Non-optimized join order: (default.nation ⋈θ default.region) (cost:
> 86513.6)
> > Optimized join order : (default.nation ⋈θ default.region) (cost:
> 86513.6)
> [#ROOT]
> > Non-optimized join order: (default.nation ⋈θ default.t) (cost:
> 1.924062464E7)
> > Optimized join order : (default.nation ⋈θ default.t) (cost:
> 1.924062464E7)
> -----------------------------
> SORT(8)
> => Sort Keys: default.n.n_nationkey (INT4) (asc)
> JOIN(12)(INNER)
> => Join Cond: default.n.n_regionkey (INT4) = default.t.r_regionkey (INT4)
> => target list: default.n.n_nationkey (INT4), default.n.n_name (TEXT),
> default.n.n_regionkey (INT4), default.t.cnt (INT8)
> => out schema: {(4) default.n.n_nationkey (INT4), default.n.n_name
> (TEXT), default.n.n_regionkey (INT4), default.t.cnt (INT8)}
> => in schema: {(5) default.n.n_nationkey (INT4), default.n.n_name
> (TEXT), default.n.n_regionkey (INT4), default.t.cnt (INT8),
> default.t.r_regionkey (INT4)}
> TABLE_SUBQUERY(6) as default.t
> => Targets: default.t.cnt (INT8), default.t.r_regionkey (INT4)
> => out schema: {(2) default.t.cnt (INT8), default.t.r_regionkey
> (INT4)}
> => in schema: {(2) default.t.r_regionkey (INT4), default.t.cnt
> (INT8)}
> GROUP_BY(4)(r_regionkey)
> => exprs: (count())
> => target list: default.r.r_regionkey (INT4), cnt (INT8)
> => out schema:{(2) default.r.r_regionkey (INT4), cnt (INT8)}
> => in schema:{(4) default.r.r_regionkey (INT4),
> default.n.n_regionkey (INT4), default.n.n_nationkey (INT4), default.n.n_name
> (TEXT)}
> JOIN(11)(INNER)
> => Join Cond: default.n.n_regionkey (INT4) =
> default.r.r_regionkey (INT4)
> => target list: default.r.r_regionkey (INT4),
> default.n.n_regionkey (INT4), default.n.n_nationkey (INT4), default.n.n_name
> (TEXT)
> => out schema: {(4) default.r.r_regionkey (INT4),
> default.n.n_regionkey (INT4), default.n.n_nationkey (INT4), default.n.n_name
> (TEXT)}
> => in schema: {(4) default.n.n_regionkey (INT4),
> default.n.n_nationkey (INT4), default.n.n_name (TEXT), default.r.r_regionkey
> (INT4)}
> SCAN(2) on default.region as r
> => target list: default.r.r_regionkey (INT4)
> => out schema: {(1) default.r.r_regionkey (INT4)}
> => in schema: {(3) default.r.r_regionkey (INT4),
> default.r.r_name (TEXT), default.r.r_comment (TEXT)}
> SCAN(1) on default.nation as n
> => target list: default.n.n_regionkey (INT4),
> default.n.n_nationkey (INT4), default.n.n_name (TEXT)
> => out schema: {(3) default.n.n_regionkey (INT4),
> default.n.n_nationkey (INT4), default.n.n_name (TEXT)}
> => in schema: {(4) default.n.n_nationkey (INT4),
> default.n.n_name (TEXT), default.n.n_regionkey (INT4), default.n.n_comment
> (TEXT)}
> SCAN(0) on default.nation as n
> => target list: default.n.n_nationkey (INT4), default.n.n_name
> (TEXT), default.n.n_regionkey (INT4)
> => out schema: {(3) default.n.n_nationkey (INT4), default.n.n_name
> (TEXT), default.n.n_regionkey (INT4)}
> => in schema: {(4) default.n.n_nationkey (INT4), default.n.n_name
> (TEXT), default.n.n_regionkey (INT4), default.n.n_comment (TEXT)}
> ...
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)