[
https://issues.apache.org/jira/browse/TAJO-1940?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15111760#comment-15111760
]
ASF GitHub Bot commented on TAJO-1940:
--------------------------------------
Github user jihoonson commented on a diff in the pull request:
https://github.com/apache/tajo/pull/910#discussion_r50492684
--- Diff:
tajo-storage/tajo-storage-hbase/src/main/java/org/apache/tajo/storage/hbase/RegionSizeCalculator.java
---
@@ -0,0 +1,147 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.storage.hbase;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.ClusterStatus;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.RegionLoad;
+import org.apache.hadoop.hbase.ServerLoad;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.RegionLocator;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * Computes size of each region for given table and given column families.
+ * The value is used by MapReduce for better scheduling.
+ * */
[email protected]
[email protected]
+public class RegionSizeCalculator {
+
+ private static final Log LOG =
LogFactory.getLog(RegionSizeCalculator.class);
+
+ /**
+ * Maps each region to its size in bytes.
+ * */
+ private final Map<byte[], Long> sizeMap = new TreeMap<byte[],
Long>(Bytes.BYTES_COMPARATOR);
+
+ static final String ENABLE_REGIONSIZECALCULATOR =
"hbase.regionsizecalculator.enable";
+
+ /**
+ * Computes size of each region for table and given column families.
+ *
+ * @deprecated Use {@link #RegionSizeCalculator(RegionLocator, Admin)}
instead.
+ */
+ @Deprecated
+ public RegionSizeCalculator(HTable table) throws IOException {
+ HBaseAdmin admin = new HBaseAdmin(table.getConfiguration());
+ try {
+ init(table.getRegionLocator(), admin);
+ } finally {
+ admin.close();
+ }
+ }
+
+ /**
+ * Computes size of each region for table and given column families.
+ * */
+ public RegionSizeCalculator(RegionLocator regionLocator, Admin admin)
throws IOException {
+ init(regionLocator, admin);
+ }
+
+ private void init(RegionLocator regionLocator, Admin admin)
+ throws IOException {
+ if (!enabled(admin.getConfiguration())) {
+ LOG.info("Region size calculation disabled.");
+ return;
+ }
+
+ LOG.info("Calculating region sizes for table \"" +
regionLocator.getName() + "\".");
+
+ //get regions for table
+ List<HRegionLocation> tableRegionInfos =
regionLocator.getAllRegionLocations();
+ Set<byte[]> tableRegions = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
+ for (HRegionLocation regionInfo : tableRegionInfos) {
+ tableRegions.add(regionInfo.getRegionInfo().getRegionName());
+ }
+
+ ClusterStatus clusterStatus = admin.getClusterStatus();
+ Collection<ServerName> servers = clusterStatus.getServers();
+ final long megaByte = 1024L * 1024L;
+
+ //iterate all cluster regions, filter regions from our table and
compute their size
+ for (ServerName serverName: servers) {
+ ServerLoad serverLoad = clusterStatus.getLoad(serverName);
+
+ for (RegionLoad regionLoad: serverLoad.getRegionsLoad().values()) {
+ byte[] regionId = regionLoad.getName();
+
+ if (tableRegions.contains(regionId)) {
+
+ long regionSizeBytes = (regionLoad.getStorefileSizeMB() +
regionLoad.getMemStoreSizeMB()) * megaByte;
--- End diff --
This line looks to be improved to consider the mem store size for region
size calculation. Would you leave some comments?
> Implement HBaseTablespace::getTableVolume() method
> --------------------------------------------------
>
> Key: TAJO-1940
> URL: https://issues.apache.org/jira/browse/TAJO-1940
> Project: Tajo
> Issue Type: Task
> Components: HBase Storage
> Reporter: Jihoon Son
> Assignee: Hyunsik Choi
> Fix For: 0.12.0, 0.11.1
>
>
> Table volume has an important role in query planning. Tajo's query optimizer
> makes many decisions based on table volume, because it is currently the only
> available statistics.
> However, HBaseTablespace doesn't support getTableVolume() method, so our plan
> is not good when the query involves any HBase tables.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)