[ 
https://issues.apache.org/jira/browse/TAJO-1940?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15111901#comment-15111901
 ] 

ASF GitHub Bot commented on TAJO-1940:
--------------------------------------

Github user hyunsik commented on a diff in the pull request:

    https://github.com/apache/tajo/pull/910#discussion_r50501497
  
    --- Diff: 
tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/RegionSizeCalculator.java
 ---
    @@ -0,0 +1,147 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.tajo.storage.hbase;
    +
    +import java.io.IOException;
    +import java.util.Arrays;
    +import java.util.Collection;
    +import java.util.Collections;
    +import java.util.List;
    +import java.util.Map;
    +import java.util.Set;
    +import java.util.TreeMap;
    +import java.util.TreeSet;
    +
    +import org.apache.commons.logging.Log;
    +import org.apache.commons.logging.LogFactory;
    +import org.apache.hadoop.conf.Configuration;
    +import org.apache.hadoop.hbase.ClusterStatus;
    +import org.apache.hadoop.hbase.HRegionLocation;
    +import org.apache.hadoop.hbase.RegionLoad;
    +import org.apache.hadoop.hbase.ServerLoad;
    +import org.apache.hadoop.hbase.ServerName;
    +import org.apache.hadoop.hbase.classification.InterfaceAudience;
    +import org.apache.hadoop.hbase.classification.InterfaceStability;
    +import org.apache.hadoop.hbase.client.Admin;
    +import org.apache.hadoop.hbase.client.HBaseAdmin;
    +import org.apache.hadoop.hbase.client.HTable;
    +import org.apache.hadoop.hbase.client.RegionLocator;
    +import org.apache.hadoop.hbase.util.Bytes;
    +
    +/**
    + * Computes size of each region for given table and given column families.
    + * The value is used by MapReduce for better scheduling.
    + * */
    [email protected]
    [email protected]
    +public class RegionSizeCalculator {
    +
    +  private static final Log LOG = 
LogFactory.getLog(RegionSizeCalculator.class);
    +
    +  /**
    +   * Maps each region to its size in bytes.
    +   * */
    +  private final Map<byte[], Long> sizeMap = new TreeMap<byte[], 
Long>(Bytes.BYTES_COMPARATOR);
    +
    +  static final String ENABLE_REGIONSIZECALCULATOR = 
"hbase.regionsizecalculator.enable";
    +
    +  /**
    +   * Computes size of each region for table and given column families.
    +   *
    +   * @deprecated Use {@link #RegionSizeCalculator(RegionLocator, Admin)} 
instead.
    +   */
    +  @Deprecated
    +  public RegionSizeCalculator(HTable table) throws IOException {
    +    HBaseAdmin admin = new HBaseAdmin(table.getConfiguration());
    +    try {
    +      init(table.getRegionLocator(), admin);
    +    } finally {
    +      admin.close();
    +    }
    +  }
    +
    +  /**
    +   * Computes size of each region for table and given column families.
    +   * */
    +  public RegionSizeCalculator(RegionLocator regionLocator, Admin admin) 
throws IOException {
    +    init(regionLocator, admin);
    +  }
    +
    +  private void init(RegionLocator regionLocator, Admin admin)
    +      throws IOException {
    +    if (!enabled(admin.getConfiguration())) {
    +      LOG.info("Region size calculation disabled.");
    +      return;
    +    }
    +
    +    LOG.info("Calculating region sizes for table \"" + 
regionLocator.getName() + "\".");
    +
    +    //get regions for table
    +    List<HRegionLocation> tableRegionInfos = 
regionLocator.getAllRegionLocations();
    +    Set<byte[]> tableRegions = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
    +    for (HRegionLocation regionInfo : tableRegionInfos) {
    +      tableRegions.add(regionInfo.getRegionInfo().getRegionName());
    +    }
    +
    +    ClusterStatus clusterStatus = admin.getClusterStatus();
    +    Collection<ServerName> servers = clusterStatus.getServers();
    +    final long megaByte = 1024L * 1024L;
    +
    +    //iterate all cluster regions, filter regions from our table and 
compute their size
    +    for (ServerName serverName: servers) {
    +      ServerLoad serverLoad = clusterStatus.getLoad(serverName);
    +
    +      for (RegionLoad regionLoad: serverLoad.getRegionsLoad().values()) {
    +        byte[] regionId = regionLoad.getName();
    +
    +        if (tableRegions.contains(regionId)) {
    +
    +          long regionSizeBytes = (regionLoad.getStorefileSizeMB() + 
regionLoad.getMemStoreSizeMB()) * megaByte;
    --- End diff --
    
    The original RegionSizeCalculator does not concern the size of memory 
store. So, I forked it and modified the class to use region files as well as 
memory store.


> Implement HBaseTablespace::getTableVolume() method
> --------------------------------------------------
>
>                 Key: TAJO-1940
>                 URL: https://issues.apache.org/jira/browse/TAJO-1940
>             Project: Tajo
>          Issue Type: Task
>          Components: HBase Storage
>            Reporter: Jihoon Son
>            Assignee: Hyunsik Choi
>             Fix For: 0.12.0, 0.11.1
>
>
> Table volume has an important role in query planning. Tajo's query optimizer 
> makes many decisions based on table volume, because it is currently the only 
> available statistics. 
> However, HBaseTablespace doesn't support getTableVolume() method, so our plan 
> is not good when the query involves any HBase tables. 



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to