Author: larsh
Date: Thu Feb 2 23:00:06 2012
New Revision: 1239909
URL: http://svn.apache.org/viewvc?rev=1239909&view=rev
Log:
HBASE-5304 Pluggable split key policy
Added:
hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/PrefixSplitKeyPolicy.java
Modified:
hbase/trunk/src/docbkx/book.xml
hbase/trunk/src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java
hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java
Modified: hbase/trunk/src/docbkx/book.xml
URL:
http://svn.apache.org/viewvc/hbase/trunk/src/docbkx/book.xml?rev=1239909&r1=1239908&r2=1239909&view=diff
==============================================================================
--- hbase/trunk/src/docbkx/book.xml (original)
+++ hbase/trunk/src/docbkx/book.xml Thu Feb 2 23:00:06 2012
@@ -2002,6 +2002,18 @@ rs.close();
the parent's hosting RegionServer and then reports the split to the
Master. See <xref linkend="disable.splitting" /> for how to manually
manage
splits (and for why you might do this)</para>
+ <section>
+ <title>Custom Split Policies</title>
+ <para>The default split policy can be overwritten using a custom
<link
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.html">RegionSplitPolicy</link>
(HBase 0.94+).
+ Typically a custom split policy should extend HBase's default split
policy: <link
xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.html">ConstantSizeRegionSplitPolicy</link>.
+ </para>
+ <para>The policy can set globally through the HBaseConfiguration
used or on a per table basis:
+<programlisting>
+HTableDescriptor myHtd = ...;
+myHtd.setValue(HTableDescriptor.SPLIT_POLICY,
MyCustomSplitPolicy.class.getName());
+</programlisting>
+ </para>
+ </section>
</section>
<section xml:id="store">
Modified:
hbase/trunk/src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java
URL:
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java?rev=1239909&r1=1239908&r2=1239909&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java
(original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java Thu
Feb 2 23:00:06 2012
@@ -69,7 +69,7 @@ public class HTableDescriptor implements
private static final String FAMILIES = "FAMILIES";
- private static final String SPLIT_POLICY = "SPLIT_POLICY";
+ public static final String SPLIT_POLICY = "SPLIT_POLICY";
/**
* <em>INTERNAL</em> Used by HBase Shell interface to access this metadata
Modified:
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.java
URL:
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.java?rev=1239909&r1=1239908&r2=1239909&view=diff
==============================================================================
---
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.java
(original)
+++
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.java
Thu Feb 2 23:00:06 2012
@@ -23,12 +23,13 @@ import org.apache.hadoop.hbase.HConstant
* A {@link RegionSplitPolicy} implementation which splits a region
* as soon as any of its store files exceeds a maximum configurable
* size.
+ * <p>This is the default split policy.</p>
*/
-class ConstantSizeRegionSplitPolicy extends RegionSplitPolicy {
+public class ConstantSizeRegionSplitPolicy extends RegionSplitPolicy {
private long desiredMaxFileSize;
@Override
- void configureForRegion(HRegion region) {
+ protected void configureForRegion(HRegion region) {
super.configureForRegion(region);
long maxFileSize = region.getTableDesc().getMaxFileSize();
@@ -41,7 +42,7 @@ class ConstantSizeRegionSplitPolicy exte
}
@Override
- boolean shouldSplit() {
+ protected boolean shouldSplit() {
boolean force = region.shouldForceSplit();
boolean foundABigStore = false;
Modified:
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL:
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=1239909&r1=1239908&r2=1239909&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
(original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
Thu Feb 2 23:00:06 2012
@@ -4863,10 +4863,6 @@ public class HRegion implements HeapSize
return null;
}
- if (this.explicitSplitPoint != null) {
- return this.explicitSplitPoint;
- }
-
if (!splitPolicy.shouldSplit()) {
return null;
}
Modified:
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java
URL:
http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java?rev=1239909&r1=1239908&r2=1239909&view=diff
==============================================================================
---
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java
(original)
+++
hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java
Thu Feb 2 23:00:06 2012
@@ -32,7 +32,7 @@ import com.google.common.base.Preconditi
* A split policy determines when a region should be split.
* {@see ConstantSizeRegionSplitPolicy}
*/
-abstract class RegionSplitPolicy extends Configured {
+public abstract class RegionSplitPolicy extends Configured {
private static final Class<ConstantSizeRegionSplitPolicy>
DEFAULT_SPLIT_POLICY_CLASS = ConstantSizeRegionSplitPolicy.class;
@@ -45,7 +45,7 @@ abstract class RegionSplitPolicy extends
* Upon construction, this method will be called with the region
* to be governed. It will be called once and only once.
*/
- void configureForRegion(HRegion region) {
+ protected void configureForRegion(HRegion region) {
Preconditions.checkState(
this.region == null,
"Policy already configured for region {}",
@@ -57,14 +57,18 @@ abstract class RegionSplitPolicy extends
/**
* @return true if the specified region should be split.
*/
- abstract boolean shouldSplit();
+ protected abstract boolean shouldSplit();
/**
* @return the key at which the region should be split, or null
* if it cannot be split. This will only be called if shouldSplit
* previously returned true.
*/
- byte[] getSplitPoint() {
+ protected byte[] getSplitPoint() {
+ byte[] explicitSplitPoint = this.region.getExplicitSplitPoint();
+ if (explicitSplitPoint != null) {
+ return explicitSplitPoint;
+ }
Map<byte[], Store> stores = region.getStores();
byte[] splitPointFromLargestStore = null;
Added:
hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/PrefixSplitKeyPolicy.java
URL:
http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/PrefixSplitKeyPolicy.java?rev=1239909&view=auto
==============================================================================
---
hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/PrefixSplitKeyPolicy.java
(added)
+++
hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/PrefixSplitKeyPolicy.java
Thu Feb 2 23:00:06 2012
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.util.Arrays;
+
+/**
+ * A custom RegionSplitPolicy for testing.
+ * This class also demonstrates how to implement a SplitPolicy that groups
+ * rows by a prefix of the row-key
+ *
+ * This ensures that a region is not split "inside"
+ * a prefix of a row key. I.e. rows can be co-located by
+ * their prefix.
+ */
+public class PrefixSplitKeyPolicy extends ConstantSizeRegionSplitPolicy {
+ public static String PREFIX_LENGTH_KEY =
"prefix_split_key_policy.prefix_length";
+
+ private int prefix_length;
+
+ @Override
+ protected void configureForRegion(HRegion region) {
+ super.configureForRegion(region);
+
+ if (region != null) {
+ // this demonstrates how a RegionSplitPolicy can be configured
+ // through HTableDescriptor values
+ prefix_length = Integer.parseInt(region.getTableDesc().getValue(
+ PREFIX_LENGTH_KEY));
+ }
+ }
+
+ @Override
+ protected byte[] getSplitPoint() {
+ byte[] splitPoint = super.getSplitPoint();
+ if (splitPoint != null && splitPoint.length > 0) {
+ // group split keys by a prefix
+ return Arrays.copyOf(splitPoint,
+ Math.min(prefix_length, splitPoint.length));
+ } else {
+ return splitPoint;
+ }
+ }
+}
Modified:
hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java
URL:
http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java?rev=1239909&r1=1239908&r2=1239909&view=diff
==============================================================================
---
hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java
(original)
+++
hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java
Thu Feb 2 23:00:06 2012
@@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.regionse
import static org.junit.Assert.*;
import java.io.IOException;
+import java.util.Arrays;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
@@ -71,6 +72,41 @@ public class TestRegionSplitPolicy {
assertEquals(9999L, policy.getDesiredMaxFileSize());
}
+ /**
+ * Test setting up a customized split policy
+ */
+ @Test
+ public void testCustomPolicy() throws IOException {
+ HTableDescriptor myHtd = new HTableDescriptor();
+ myHtd.setValue(HTableDescriptor.SPLIT_POLICY,
+ PrefixSplitKeyPolicy.class.getName());
+ myHtd.setValue(PrefixSplitKeyPolicy.PREFIX_LENGTH_KEY, String.valueOf(2));
+
+ HRegion myMockRegion = Mockito.mock(HRegion.class);
+ Mockito.doReturn(myHtd).when(myMockRegion).getTableDesc();
+ Mockito.doReturn(stores).when(myMockRegion).getStores();
+
+ Store mockStore = Mockito.mock(Store.class);
+ Mockito.doReturn(2000L).when(mockStore).getSize();
+ Mockito.doReturn(true).when(mockStore).canSplit();
+ Mockito.doReturn(Bytes.toBytes("abcd")).when(mockStore).getSplitPoint();
+ stores.put(new byte[] { 1 }, mockStore);
+
+ PrefixSplitKeyPolicy policy = (PrefixSplitKeyPolicy) RegionSplitPolicy
+ .create(myMockRegion, conf);
+
+ assertEquals("ab", Bytes.toString(policy.getSplitPoint()));
+
+ Mockito.doReturn(true).when(myMockRegion).shouldForceSplit();
+ Mockito.doReturn(Bytes.toBytes("efgh")).when(myMockRegion)
+ .getExplicitSplitPoint();
+
+ policy = (PrefixSplitKeyPolicy) RegionSplitPolicy
+ .create(myMockRegion, conf);
+
+ assertEquals("ef", Bytes.toString(policy.getSplitPoint()));
+ }
+
@Test
public void testConstantSizePolicy() throws IOException {
htd.setMaxFileSize(1024L);