Repository: tajo Updated Branches: refs/heads/master aa8969ac8 -> 22d2ba4db
TAJO-1552: NPE occurs when GreedyHeuristicJoinOrderAlgorithm.getCost() returns infinity. Closes #655, closes #547 Signed-off-by: Jihoon Son <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/22d2ba4d Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/22d2ba4d Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/22d2ba4d Branch: refs/heads/master Commit: 22d2ba4db5c2623a1f8e77fb4028c923631f3536 Parents: aa8969a Author: Hyoungjun Kim <[email protected]> Authored: Wed Jul 29 18:39:19 2015 +0900 Committer: Jihoon Son <[email protected]> Committed: Wed Jul 29 18:41:19 2015 +0900 ---------------------------------------------------------------------- CHANGES | 3 + .../org/apache/tajo/catalog/CatalogService.java | 14 +- .../engine/planner/TestJoinOrderAlgorithm.java | 185 +++++++++++++++++++ .../org/apache/tajo/plan/LogicalPlanner.java | 4 +- .../GreedyHeuristicJoinOrderAlgorithm.java | 25 ++- 5 files changed, 214 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/22d2ba4d/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index 91a8346..4632ddf 100644 --- a/CHANGES +++ b/CHANGES @@ -200,6 +200,9 @@ Release 0.11.0 - unreleased BUG FIXES + TAJO-1552: NPE occurs when GreedyHeuristicJoinOrderAlgorithm.getCost() returns + infinity. (Contributed by Hyoungjun Kim, Committed by jihoon) + TAJO-1712: querytasks.jsp throws NPE occasionally when tasks are running. (jinho) http://git-wip-us.apache.org/repos/asf/tajo/blob/22d2ba4d/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogService.java ---------------------------------------------------------------------- diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogService.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogService.java index 5dc5412..f1bfe6a 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogService.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogService.java @@ -22,23 +22,13 @@ import org.apache.tajo.catalog.exception.UndefinedFunctionException; import org.apache.tajo.catalog.exception.UndefinedPartitionException; import org.apache.tajo.catalog.partition.PartitionMethodDesc; import org.apache.tajo.catalog.proto.CatalogProtos; -import org.apache.tajo.catalog.proto.CatalogProtos.ColumnProto; -import org.apache.tajo.catalog.proto.CatalogProtos.DatabaseProto; -import org.apache.tajo.catalog.proto.CatalogProtos.IndexProto; -import org.apache.tajo.catalog.proto.CatalogProtos.TableDescriptorProto; -import org.apache.tajo.catalog.proto.CatalogProtos.TableOptionProto; -import org.apache.tajo.catalog.proto.CatalogProtos.TablePartitionProto; -import org.apache.tajo.catalog.proto.CatalogProtos.TableStatsProto; +import org.apache.tajo.catalog.proto.CatalogProtos.*; import org.apache.tajo.common.TajoDataTypes.DataType; -import java.sql.SQLException; import java.util.Collection; import java.util.List; -import static org.apache.tajo.catalog.proto.CatalogProtos.AlterTablespaceProto; -import static org.apache.tajo.catalog.proto.CatalogProtos.FunctionType; -import static org.apache.tajo.catalog.proto.CatalogProtos.TablespaceProto; -import static org.apache.tajo.catalog.proto.CatalogProtos.UpdateTableStatsProto; +import static org.apache.tajo.catalog.proto.CatalogProtos.*; public interface CatalogService { http://git-wip-us.apache.org/repos/asf/tajo/blob/22d2ba4d/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestJoinOrderAlgorithm.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestJoinOrderAlgorithm.java b/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestJoinOrderAlgorithm.java new file mode 100644 index 0000000..eb4a3f4 --- /dev/null +++ b/tajo-core/src/test/java/org/apache/tajo/engine/planner/TestJoinOrderAlgorithm.java @@ -0,0 +1,185 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.planner; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.tajo.LocalTajoTestingUtility; +import org.apache.tajo.TajoConstants; +import org.apache.tajo.TajoTestingCluster; +import org.apache.tajo.algebra.Expr; +import org.apache.tajo.catalog.*; +import org.apache.tajo.catalog.proto.CatalogProtos.StoreType; +import org.apache.tajo.catalog.statistics.TableStats; +import org.apache.tajo.common.TajoDataTypes.Type; +import org.apache.tajo.engine.function.FunctionLoader; +import org.apache.tajo.engine.parser.SQLAnalyzer; +import org.apache.tajo.engine.query.QueryContext; +import org.apache.tajo.plan.LogicalOptimizer; +import org.apache.tajo.plan.LogicalPlan; +import org.apache.tajo.plan.LogicalPlanner; +import org.apache.tajo.plan.logical.*; +import org.apache.tajo.plan.util.PlannerUtil; +import org.apache.tajo.storage.TablespaceManager; +import org.apache.tajo.unit.StorageUnit; +import org.apache.tajo.util.CommonTestingUtil; +import org.apache.tajo.util.KeyValueSet; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import static org.apache.tajo.TajoConstants.DEFAULT_DATABASE_NAME; +import static org.apache.tajo.TajoConstants.DEFAULT_TABLESPACE_NAME; +import static org.junit.Assert.*; + +public class TestJoinOrderAlgorithm { + + private static TajoTestingCluster util; + private static CatalogService catalog; + private static SQLAnalyzer sqlAnalyzer; + private static LogicalPlanner planner; + private static LogicalOptimizer optimizer; + private static QueryContext defaultContext; + + @BeforeClass + public static void setUp() throws Exception { + util = new TajoTestingCluster(); + util.startCatalogCluster(); + catalog = util.getMiniCatalogCluster().getCatalog(); + catalog.createTablespace(DEFAULT_TABLESPACE_NAME, "hdfs://localhost:1234/warehouse"); + catalog.createDatabase(DEFAULT_DATABASE_NAME, DEFAULT_TABLESPACE_NAME); + for (FunctionDesc funcDesc : FunctionLoader.findLegacyFunctions()) { + catalog.createFunction(funcDesc); + } + + Schema schema = new Schema(); + schema.addColumn("name", Type.TEXT); + schema.addColumn("empid", Type.INT4); + schema.addColumn("deptname", Type.TEXT); + + Schema schema2 = new Schema(); + schema2.addColumn("deptname", Type.TEXT); + schema2.addColumn("manager", Type.TEXT); + + Schema schema3 = new Schema(); + schema3.addColumn("deptname", Type.TEXT); + schema3.addColumn("score", Type.INT4); + schema3.addColumn("phone", Type.INT4); + + TableMeta meta = CatalogUtil.newTableMeta("TEXT"); + TableDesc people = new TableDesc( + CatalogUtil.buildFQName(TajoConstants.DEFAULT_DATABASE_NAME, "employee"), schema, meta, + CommonTestingUtil.getTestDir().toUri()); + catalog.createTable(people); + + TableDesc student = + new TableDesc( + CatalogUtil.buildFQName(DEFAULT_DATABASE_NAME, "dept"), schema2, "TEXT", new KeyValueSet(), + CommonTestingUtil.getTestDir().toUri()); + catalog.createTable(student); + + TableDesc score = + new TableDesc( + CatalogUtil.buildFQName(DEFAULT_DATABASE_NAME, "score"), schema3, "TEXT", new KeyValueSet(), + CommonTestingUtil.getTestDir().toUri()); + catalog.createTable(score); + + /////////////////////////////////////////////////////////////////////////// + // creating table for overflow in JoinOrderOptimizer. + Schema schema4 = new Schema(); + schema4.addColumn("deptname", Type.TEXT); + schema4.addColumn("manager", Type.TEXT); + // Set store type as FAKEFILE to prevent auto update of physical information in LogicalPlanner.updatePhysicalInfo() + TableMeta largeTableMeta = CatalogUtil.newTableMeta("FAKEFILE"); + TableDesc largeDept; + TableStats largeTableStats; + FileSystem fs = FileSystem.getLocal(util.getConfiguration()); + for (int i = 0; i < 6; i++) { + Path tablePath = new Path(CommonTestingUtil.getTestDir(), "" + (i+1)); + fs.create(tablePath); + largeDept = + new TableDesc( + CatalogUtil.buildFQName(DEFAULT_DATABASE_NAME, "large_dept"+(i+1)), schema4, largeTableMeta, + tablePath.toUri()); + largeTableStats = new TableStats(); + largeTableStats.setNumBytes(StorageUnit.PB * (i+1)); //1 PB * i + largeDept.setStats(largeTableStats); + catalog.createTable(largeDept); + } + /////////////////////////////////////////////////////////////////////////// + + sqlAnalyzer = new SQLAnalyzer(); + planner = new LogicalPlanner(catalog, TablespaceManager.getInstance()); + optimizer = new LogicalOptimizer(util.getConfiguration()); + + defaultContext = LocalTajoTestingUtility.createDummyContext(util.getConfiguration()); + } + + @AfterClass + public static void tearDown() throws Exception { + util.shutdownCatalogCluster(); + } + + @Test + public final void testCheckingInfinityJoinScore() throws Exception { + // Test for TAJO-1552 + String query = "select a.deptname from large_dept1 a, large_dept2 b, large_dept3 c, " + + "large_dept4 d, large_dept5 e, large_dept6 f "; + + Expr expr = sqlAnalyzer.parse(query); + LogicalPlan newPlan = planner.createPlan(defaultContext, expr); + LogicalNode[] joinNodes = PlannerUtil.findAllNodes(newPlan. getRootBlock().getRoot() , NodeType.JOIN); + assertNotNull(joinNodes); + assertEquals(5, joinNodes.length); + assertJoinNode(joinNodes[0], "default.a", "default.b"); + assertJoinNode(joinNodes[1], null, "default.c"); + assertJoinNode(joinNodes[2], null, "default.d"); + assertJoinNode(joinNodes[3], null, "default.e"); + assertJoinNode(joinNodes[4], null, "default.f"); + + optimizer.optimize(newPlan); + + joinNodes = PlannerUtil.findAllNodes(newPlan. getRootBlock().getRoot() , NodeType.JOIN); + assertNotNull(joinNodes); + assertEquals(5, joinNodes.length); + assertJoinNode(joinNodes[0], "default.d", "default.c"); + assertJoinNode(joinNodes[1], "default.b", "default.a"); + assertJoinNode(joinNodes[2], null, null); + assertJoinNode(joinNodes[3], "default.f", "default.e"); + assertJoinNode(joinNodes[4], null, null); + + } + + private void assertJoinNode(LogicalNode node, String left, String right) { + assertEquals(NodeType.JOIN, node.getType()); + JoinNode joinNode = (JoinNode)node; + + if (left != null) { + assertEquals(left, ((ScanNode)joinNode.getLeftChild()).getCanonicalName()); + } else { + assertEquals(NodeType.JOIN, joinNode.getLeftChild().getType()); + } + + if (right != null) { + assertEquals(right, ((ScanNode)joinNode.getRightChild()).getCanonicalName()); + } else { + assertEquals(NodeType.JOIN, joinNode.getRightChild().getType()); + } + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/22d2ba4d/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java index 47ab9b1..7f5cf9a 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java @@ -1357,7 +1357,9 @@ public class LogicalPlanner extends BaseAlgebraVisitor<LogicalPlanner.PlanContex private void updatePhysicalInfo(PlanContext planContext, TableDesc desc) { if (desc.getUri() != null && - desc.getMeta().getStoreType() != "SYSTEM" && PlannerUtil.isFileStorageType(desc.getMeta().getStoreType())) { + !desc.getMeta().getStoreType().equals("SYSTEM") && + !desc.getMeta().getStoreType().equals("FAKEFILE") && // FAKEFILE is used for test + PlannerUtil.isFileStorageType(desc.getMeta().getStoreType())) { try { Path path = new Path(desc.getUri()); FileSystem fs = path.getFileSystem(planContext.queryContext.getConf()); http://git-wip-us.apache.org/repos/asf/tajo/blob/22d2ba4d/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java b/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java index c1a8e85..403d0b8 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java @@ -203,7 +203,8 @@ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { JoinOrderingUtil.updateQualIfNecessary(graphContext, foundJoin); double cost = getCost(foundJoin); - if (cost < minCost) { + if (cost < minCost || + (cost == minCost && cost == Double.MAX_VALUE)) { minCost = cost; bestJoin = foundJoin; } @@ -236,7 +237,7 @@ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { } private static JoinEdge swapLeftAndRightIfNecessary(JoinEdge edge) { - if (PlannerUtil.isCommutativeJoinType(edge.getJoinType()) || edge.getJoinType() == JoinType.FULL_OUTER) { + if (PlannerUtil.isCommutativeJoinType(edge.getJoinType())) { double leftCost = getCost(edge.getLeftVertex()); double rightCost = getCost(edge.getRightVertex()); if (leftCost < rightCost) { @@ -398,7 +399,7 @@ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { getCost(joinEdge.getRightVertex()), 2); } - return cost * COMPUTATION_FACTOR; + return checkInfinity(cost * COMPUTATION_FACTOR); } public static double getCost(JoinVertex joinVertex) { @@ -411,6 +412,22 @@ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { return cost; } + /** + * Return the MAX(MIN) value if the given cost is positive(negative) infinity. + * + * @param cost + * @return + */ + private static double checkInfinity(double cost) { + if (cost == Double.POSITIVE_INFINITY) { + return Long.MAX_VALUE; + } else if (cost == Double.NEGATIVE_INFINITY) { + return Long.MIN_VALUE; + } else { + return cost; + } + } + // TODO - costs of other operator operators (e.g., group-by and sort) should be computed in proper manners. public static double getCost(LogicalNode node) { double cost; @@ -449,7 +466,7 @@ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { if (scanNode.getTableDesc().getStats() != null) { cost = ((ScanNode)node).getTableDesc().getStats().getNumBytes(); } else { - cost = Long.MAX_VALUE; + cost = Integer.MAX_VALUE; } break;
