[
https://issues.apache.org/jira/browse/TAJO-1991?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15039578#comment-15039578
]
ASF GitHub Bot commented on TAJO-1991:
--------------------------------------
Github user jinossy commented on a diff in the pull request:
https://github.com/apache/tajo/pull/901#discussion_r46641076
--- Diff:
tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/TableStatUpdateRewriter.java
---
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.plan.rewrite;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.tajo.OverridableConf;
+import org.apache.tajo.SessionVars;
+import org.apache.tajo.catalog.TableDesc;
+import org.apache.tajo.catalog.statistics.TableStats;
+import org.apache.tajo.exception.TajoException;
+import org.apache.tajo.exception.UnsupportedException;
+import org.apache.tajo.plan.LogicalPlan;
+import org.apache.tajo.plan.StorageService;
+import org.apache.tajo.plan.expr.EvalNode;
+import org.apache.tajo.plan.logical.LogicalNode;
+import org.apache.tajo.plan.logical.ScanNode;
+import org.apache.tajo.plan.visitor.BasicLogicalPlanVisitor;
+import org.apache.tajo.unit.StorageUnit;
+
+import java.util.Optional;
+import java.util.Stack;
+
+public class TableStatUpdateRewriter implements LogicalPlanRewriteRule {
+ private static final Log LOG =
LogFactory.getLog(TableStatUpdateRewriter.class);
+
+ private static final String NAME = "Table Stat Updater";
+
+ @Override
+ public String getName() {
+ return NAME;
+ }
+
+ @Override
+ public boolean isEligible(LogicalPlanRewriteRuleContext context) {
+ return true;
+ }
+
+ @Override
+ public LogicalPlan rewrite(LogicalPlanRewriteRuleContext context) throws
TajoException {
+ LogicalPlan plan = context.getPlan();
+ LogicalPlan.QueryBlock rootBlock = plan.getRootBlock();
+
+ Rewriter r = new Rewriter(context.getQueryContext(),
context.getStorage());
+ r.visit(rootBlock, plan, rootBlock, rootBlock.getRoot(), new
Stack<>());
+ return plan;
+ }
+
+ private final class Rewriter extends BasicLogicalPlanVisitor<Object,
Object> {
+ private final OverridableConf conf;
+ private final StorageService storage;
+
+
+ private Rewriter(OverridableConf conf, StorageService storage) {
+ this.conf = conf;
+ this.storage = storage;
+ }
+
+ @Override
+ public Object visitScan(Object object, LogicalPlan plan,
LogicalPlan.QueryBlock block, ScanNode scanNode,
+ Stack<LogicalNode> stack) throws TajoException
{
+ final TableDesc table = scanNode.getTableDesc();
+
+ if (!isVirtual(table)) {
+ final TableStats stats = getTableStat(table);
+ final long tableSize = stats.getNumBytes();
+ final Optional<EvalNode> filter = scanNode.hasQual() ?
Optional.of(scanNode.getQual()) : Optional.empty();
+
+ // If USE_TABLE_VOLUME is set, we will update the table volume
through a storage handler.
+ // In addition, if the table size is zero, we will update too.
+ // It is a good workaround to avoid suboptimal join orders without
cheap cost.
+ if (conf.getBool(SessionVars.USE_TABLE_VOLUME) || tableSize == 0) {
+ table.getStats().setNumBytes(getTableVolume(table, filter));
+ }
+ }
+
+ return scanNode;
+ }
+
+ private TableStats getTableStat(TableDesc table) {
+ TableStats stats;
+ if (table.getStats() == null) {
+ stats = new TableStats();
+ table.setStats(stats);
+ } else {
+ stats = table.getStats();
+ }
+ return stats;
+ }
+
+ private boolean isVirtual(TableDesc table) {
+ return table.getMeta().getDataFormat().equals("SYSTEM") ||
table.getMeta().getDataFormat().equals("FAKEFILE");
+ }
+
+ private long getTableVolume(TableDesc table, Optional<EvalNode>
filter) {
+ try {
+ if (table.getStats() != null) {
+ return storage.getTableVolumn(table.getUri(), filter);
+ }
+ } catch (UnsupportedException t) {
+ LOG.warn(table.getName() + " does not support
Tablespace::getTableVolume()");
+ // By default, return 1GB to avoid a single task
+ }
+
+ return StorageUnit.GB;
--- End diff --
How about the configurable value ?
> Tablespace::getVolume should take filter predication
> ----------------------------------------------------
>
> Key: TAJO-1991
> URL: https://issues.apache.org/jira/browse/TAJO-1991
> Project: Tajo
> Issue Type: Improvement
> Components: Storage
> Reporter: Hyunsik Choi
> Assignee: Hyunsik Choi
> Fix For: 0.12.0, 0.11.1
>
>
> {{Tablespace::getVolume}} plays a role to guess or compute exactly the table
> volume. This volume result is usually used for join optimization. But,
> currently {{Tablespace::getVolume}} just returns whole table volume. It may
> cause sub optimal join order, especially in partitioned tables or indexable
> tables like HBase.
> The main objective of this patch is to improve {{getVolume}} method to take
> filter predicates and return more reasonable volume sizes according to
> predicates.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)