Author: ramkrishna Date: Thu Jan 12 17:45:05 2012 New Revision: 1230662 URL: http://svn.apache.org/viewvc?rev=1230662&view=rev Log: HBASE-5159 src/main/java/org/apache/hadoop/hbase/client/HTableUtil.java (Ram)
Added: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/client/HTableUtil.java Modified: hbase/branches/0.90/CHANGES.txt Modified: hbase/branches/0.90/CHANGES.txt URL: http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1230662&r1=1230661&r2=1230662&view=diff ============================================================================== --- hbase/branches/0.90/CHANGES.txt (original) +++ hbase/branches/0.90/CHANGES.txt Thu Jan 12 17:45:05 2012 @@ -160,6 +160,7 @@ Release 0.90.5 - Dec 22, 2011 HBASE-5158 Backport HBASE-4878 - Master crash when splitting hlog may cause data loss (Ram) HBASE-5168 Backport HBASE-5100 - Rollback of split could cause closed region to be opened again(Ram) HBASE-5178 Backport HBASE-4101 - Regionserver Deadlock (Ram) + HBASE-5159 Backport HBASE-4079 - HTableUtil - helper class for loading data (Ram) NEW FEATURE HBASE-4377 [hbck] Offline rebuild .META. from fs data only Added: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/client/HTableUtil.java URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/client/HTableUtil.java?rev=1230662&view=auto ============================================================================== --- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/client/HTableUtil.java (added) +++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/client/HTableUtil.java Thu Jan 12 17:45:05 2012 @@ -0,0 +1,156 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import java.io.IOException; +import java.lang.InterruptedException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Row; + +/** + * Utility class for HTable. + * + * + */ +public class HTableUtil { + + private static final int INITIAL_LIST_SIZE = 250; + + /** + * Processes a List of Puts and writes them to an HTable instance in + * RegionServer buckets via the htable.put method. This will utilize the + * writeBuffer, thus the writeBuffer flush frequency may be tuned accordingly + * via htable.setWriteBufferSize. <br> + * <br> + * The benefit of submitting Puts in this manner is to minimize the number of + * RegionServer RPCs in each flush. <br> + * <br> + * Assumption #1: Regions have been pre-created for the table. If they + * haven't, then all of the Puts will go to the same region, defeating the + * purpose of this utility method. See the Apache HBase book for an + * explanation of how to do this. <br> + * Assumption #2: Row-keys are not monotonically increasing. See the Apache + * HBase book for an explanation of this problem. <br> + * Assumption #3: That the input list of Puts is big enough to be useful (in + * the thousands or more). The intent of this method is to process larger + * chunks of data. <br> + * Assumption #4: htable.setAutoFlush(false) has been set. This is a + * requirement to use the writeBuffer. <br> + * <br> + * + * @param htable + * HTable instance for target HBase table + * @param puts + * List of Put instances + * @throws IOException + * if a remote or network exception occurs + * + */ + public static void bucketRsPut(HTable htable, List<Put> puts) + throws IOException { + + Map<String, List<Put>> putMap = createRsPutMap(htable, puts); + for (List<Put> rsPuts : putMap.values()) { + htable.put(rsPuts); + } + htable.flushCommits(); + } + + /** + * Processes a List of Rows (Put, Delete) and writes them to an HTable + * instance in RegionServer buckets via the htable.batch method. <br> + * <br> + * The benefit of submitting Puts in this manner is to minimize the number of + * RegionServer RPCs, thus this will produce one RPC of Puts per RegionServer. <br> + * <br> + * Assumption #1: Regions have been pre-created for the table. If they + * haven't, then all of the Puts will go to the same region, defeating the + * purpose of this utility method. See the Apache HBase book for an + * explanation of how to do this. <br> + * Assumption #2: Row-keys are not monotonically increasing. See the Apache + * HBase book for an explanation of this problem. <br> + * Assumption #3: That the input list of Rows is big enough to be useful (in + * the thousands or more). The intent of this method is to process larger + * chunks of data. <br> + * <br> + * This method accepts a list of Row objects because the underlying .batch + * method accepts a list of Row objects. <br> + * <br> + * + * @param htable + * HTable instance for target HBase table + * @param rows + * List of Row instances + * @throws IOException + * if a remote or network exception occurs + */ + public static void bucketRsBatch(HTable htable, List<Row> rows) + throws IOException { + + try { + Map<String, List<Row>> rowMap = createRsRowMap(htable, rows); + for (List<Row> rsRows : rowMap.values()) { + htable.batch(rsRows); + } + } catch (InterruptedException e) { + throw new IOException(e); + } + + } + + private static Map<String, List<Put>> createRsPutMap(HTable htable, + List<Put> puts) throws IOException { + + Map<String, List<Put>> putMap = new HashMap<String, List<Put>>(); + for (Put put : puts) { + HRegionLocation rl = htable.getRegionLocation(put.getRow()); + String hostname = rl.getServerAddress().getHostname(); + List<Put> recs = putMap.get(hostname); + if (recs == null) { + recs = new ArrayList<Put>(INITIAL_LIST_SIZE); + putMap.put(hostname, recs); + } + recs.add(put); + } + return putMap; + } + + private static Map<String, List<Row>> createRsRowMap(HTable htable, + List<Row> rows) throws IOException { + + Map<String, List<Row>> rowMap = new HashMap<String, List<Row>>(); + for (Row row : rows) { + HRegionLocation rl = htable.getRegionLocation(row.getRow()); + String hostname = rl.getServerAddress().getHostname(); + List<Row> recs = rowMap.get(hostname); + if (recs == null) { + recs = new ArrayList<Row>(INITIAL_LIST_SIZE); + rowMap.put(hostname, recs); + } + recs.add(row); + } + return rowMap; + } + +}