Repository: hbase Updated Branches: refs/heads/branch-1 7e5b93109 -> e06bbe480
HBASE-16280 Use hash based map in SequenceIdAccounting Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/e06bbe48 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/e06bbe48 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/e06bbe48 Branch: refs/heads/branch-1 Commit: e06bbe480405f2e46c837000b73d34f900cd987c Parents: 7e5b931 Author: zhangduo <[email protected]> Authored: Tue Jul 26 11:45:55 2016 +0800 Committer: zhangduo <[email protected]> Committed: Tue Jul 26 16:36:46 2016 +0800 ---------------------------------------------------------------------- .../hadoop/hbase/util/ImmutableByteArray.java | 54 +++++ .../regionserver/wal/SequenceIdAccounting.java | 224 +++++++++++-------- 2 files changed, 180 insertions(+), 98 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/e06bbe48/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ImmutableByteArray.java ---------------------------------------------------------------------- diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ImmutableByteArray.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ImmutableByteArray.java new file mode 100644 index 0000000..afd1ebf --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ImmutableByteArray.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import org.apache.hadoop.hbase.classification.InterfaceAudience; + +/** + * Mainly used as keys for HashMap. + */ [email protected] +public final class ImmutableByteArray { + + private final byte[] b; + + private ImmutableByteArray(byte[] b) { + this.b = b; + } + + @Override + public int hashCode() { + return Bytes.hashCode(b); + } + + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != ImmutableByteArray.class) { + return false; + } + return Bytes.equals(b, ((ImmutableByteArray) obj).b); + } + + public static ImmutableByteArray wrap(byte[] b) { + return new ImmutableByteArray(b); + } + + public String toStringUtf8() { + return Bytes.toString(b); + } +} http://git-wip-us.apache.org/repos/asf/hbase/blob/e06bbe48/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceIdAccounting.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceIdAccounting.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceIdAccounting.java index 6e10f3c..e78cbc2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceIdAccounting.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceIdAccounting.java @@ -17,29 +17,39 @@ */ package org.apache.hadoop.hbase.regionserver.wal; +import com.google.common.annotations.VisibleForTesting; + import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.TreeMap; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ConcurrentSkipListMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.util.Bytes; - -import com.google.common.collect.Maps; +import org.apache.hadoop.hbase.util.ImmutableByteArray; /** * Accounting of sequence ids per region and then by column family. So we can our accounting - * current, call startCacheFlush and then finishedCacheFlush or abortCacheFlush so this instance - * can keep abreast of the state of sequence id persistence. Also call update per append. + * current, call startCacheFlush and then finishedCacheFlush or abortCacheFlush so this instance can + * keep abreast of the state of sequence id persistence. Also call update per append. + * <p> + * For the implementation, we assume that all the {@code encodedRegionName} passed in is gotten by + * {@link HRegionInfo#getEncodedNameAsBytes()}. So it is safe to use it as a hash key. And for + * family name, we use {@link ImmutableByteArray} as key. This is because hash based map is much + * faster than RBTree or CSLM and here we are on the critical write path. See HBASE-16278 for more + * details. */ [email protected] class SequenceIdAccounting { + private static final Log LOG = LogFactory.getLog(SequenceIdAccounting.class); /** * This lock ties all operations on {@link SequenceIdAccounting#flushingSequenceIds} and @@ -70,9 +80,8 @@ class SequenceIdAccounting { * <p>If flush fails, currently server is aborted so no need to restore previous sequence ids. * <p>Needs to be concurrent Maps because we use putIfAbsent updating oldest. */ - private final ConcurrentMap<byte[], ConcurrentMap<byte[], Long>> lowestUnflushedSequenceIds - = new ConcurrentSkipListMap<byte[], ConcurrentMap<byte[], Long>>( - Bytes.BYTES_COMPARATOR); + private final ConcurrentMap<byte[], ConcurrentMap<ImmutableByteArray, Long>> + lowestUnflushedSequenceIds = new ConcurrentHashMap<>(); /** * Map of encoded region names and family names to their lowest or OLDEST sequence/edit id @@ -80,8 +89,7 @@ class SequenceIdAccounting { * {@link #lowestUnflushedSequenceIds} while the lock {@link #tieLock} is held * (so movement between the Maps is atomic). */ - private final Map<byte[], Map<byte[], Long>> flushingSequenceIds = - new TreeMap<byte[], Map<byte[], Long>>(Bytes.BYTES_COMPARATOR); + private final Map<byte[], Map<ImmutableByteArray, Long>> flushingSequenceIds = new HashMap<>(); /** * Map of region encoded names to the latest/highest region sequence id. Updated on each @@ -91,7 +99,7 @@ class SequenceIdAccounting { * use {@link HRegionInfo#getEncodedNameAsBytes()} as keys. For a given region, it always returns * the same array. */ - private Map<byte[], Long> highestSequenceIds = new HashMap<byte[], Long>(); + private Map<byte[], Long> highestSequenceIds = new HashMap<>(); /** * Returns the lowest unflushed sequence id for the region. @@ -99,33 +107,39 @@ class SequenceIdAccounting { * @return Lowest outstanding unflushed sequenceid for <code>encodedRegionName</code>. Will * return {@link HConstants#NO_SEQNUM} when none. */ - long getLowestSequenceId(final byte [] encodedRegionName) { - synchronized (this.tieLock) { - Map<byte[], Long> m = this.flushingSequenceIds.get(encodedRegionName); - long flushingLowest = m != null? getLowestSequenceId(m): Long.MAX_VALUE; + long getLowestSequenceId(final byte[] encodedRegionName) { + synchronized (this.tieLock) { + Map<?, Long> m = this.flushingSequenceIds.get(encodedRegionName); + long flushingLowest = m != null ? getLowestSequenceId(m) : Long.MAX_VALUE; m = this.lowestUnflushedSequenceIds.get(encodedRegionName); - long unflushedLowest = m != null? getLowestSequenceId(m): HConstants.NO_SEQNUM; + long unflushedLowest = m != null ? getLowestSequenceId(m) : HConstants.NO_SEQNUM; return Math.min(flushingLowest, unflushedLowest); } } /** * @param encodedRegionName - * @param familyName + * @param familyName * @return Lowest outstanding unflushed sequenceid for <code>encodedRegionname</code> and - * <code>familyName</code>. Returned sequenceid may be for an edit currently being flushed. + * <code>familyName</code>. Returned sequenceid may be for an edit currently being + * flushed. */ - long getLowestSequenceId(final byte [] encodedRegionName, final byte [] familyName) { + long getLowestSequenceId(final byte[] encodedRegionName, final byte[] familyName) { + ImmutableByteArray familyNameWrapper = ImmutableByteArray.wrap(familyName); synchronized (this.tieLock) { - Map<byte[], Long> m = this.flushingSequenceIds.get(encodedRegionName); + Map<ImmutableByteArray, Long> m = this.flushingSequenceIds.get(encodedRegionName); if (m != null) { - Long lowest = m.get(familyName); - if (lowest != null) return lowest; + Long lowest = m.get(familyNameWrapper); + if (lowest != null) { + return lowest; + } } m = this.lowestUnflushedSequenceIds.get(encodedRegionName); if (m != null) { - Long lowest = m.get(familyName); - if (lowest != null) return lowest; + Long lowest = m.get(familyNameWrapper); + if (lowest != null) { + return lowest; + } } } return HConstants.NO_SEQNUM; @@ -156,29 +170,33 @@ class SequenceIdAccounting { Long l = Long.valueOf(sequenceid); this.highestSequenceIds.put(encodedRegionName, l); if (lowest) { - ConcurrentMap<byte[], Long> m = getOrCreateLowestSequenceIds(encodedRegionName); + ConcurrentMap<ImmutableByteArray, Long> m = getOrCreateLowestSequenceIds(encodedRegionName); for (byte[] familyName : families) { - m.putIfAbsent(familyName, l); + m.putIfAbsent(ImmutableByteArray.wrap(familyName), l); } } } - ConcurrentMap<byte[], Long> getOrCreateLowestSequenceIds(byte[] encodedRegionName) { + @VisibleForTesting + ConcurrentMap<ImmutableByteArray, Long> getOrCreateLowestSequenceIds(byte[] encodedRegionName) { // Intentionally, this access is done outside of this.regionSequenceIdLock. Done per append. - ConcurrentMap<byte[], Long> m = this.lowestUnflushedSequenceIds.get(encodedRegionName); - if (m != null) return m; - m = new ConcurrentSkipListMap<byte[], Long>(Bytes.BYTES_COMPARATOR); + ConcurrentMap<ImmutableByteArray, Long> m = this.lowestUnflushedSequenceIds + .get(encodedRegionName); + if (m != null) { + return m; + } + m = new ConcurrentHashMap<>(); // Another thread may have added it ahead of us. - ConcurrentMap<byte[], Long> alreadyPut = - this.lowestUnflushedSequenceIds.putIfAbsent(encodedRegionName, m); - return alreadyPut == null? m : alreadyPut; + ConcurrentMap<ImmutableByteArray, Long> alreadyPut = this.lowestUnflushedSequenceIds + .putIfAbsent(encodedRegionName, m); + return alreadyPut == null ? m : alreadyPut; } /** * @param sequenceids Map to search for lowest value. * @return Lowest value found in <code>sequenceids</code>. */ - static long getLowestSequenceId(Map<byte[], Long> sequenceids) { + private static long getLowestSequenceId(Map<?, Long> sequenceids) { long lowest = HConstants.NO_SEQNUM; for (Long sid: sequenceids.values()) { if (lowest == HConstants.NO_SEQNUM || sid.longValue() < lowest) { @@ -191,13 +209,14 @@ class SequenceIdAccounting { /** * @param src * @return New Map that has same keys as <code>src</code> but instead of a Map for a value, it - * instead has found the smallest sequence id and it returns that as the value instead. + * instead has found the smallest sequence id and it returns that as the value instead. */ - private <T extends Map<byte[], Long>> Map<byte[], Long> flattenToLowestSequenceId( - Map<byte[], T> src) { - if (src == null || src.isEmpty()) return null; - Map<byte[], Long> tgt = Maps.newHashMap(); - for (Map.Entry<byte[], T> entry: src.entrySet()) { + private <T extends Map<?, Long>> Map<byte[], Long> flattenToLowestSequenceId(Map<byte[], T> src) { + if (src == null || src.isEmpty()) { + return null; + } + Map<byte[], Long> tgt = new HashMap<>(); + for (Map.Entry<byte[], T> entry : src.entrySet()) { long lowestSeqId = getLowestSequenceId(entry.getValue()); if (lowestSeqId != HConstants.NO_SEQNUM) { tgt.put(entry.getKey(), lowestSeqId); @@ -216,20 +235,23 @@ class SequenceIdAccounting { * oldest/lowest outstanding edit. */ Long startCacheFlush(final byte[] encodedRegionName, final Set<byte[]> families) { - Map<byte[], Long> oldSequenceIds = null; + Map<ImmutableByteArray, Long> oldSequenceIds = null; Long lowestUnflushedInRegion = HConstants.NO_SEQNUM; synchronized (tieLock) { - Map<byte[], Long> m = this.lowestUnflushedSequenceIds.get(encodedRegionName); + Map<ImmutableByteArray, Long> m = this.lowestUnflushedSequenceIds.get(encodedRegionName); if (m != null) { // NOTE: Removal from this.lowestUnflushedSequenceIds must be done in controlled // circumstance because another concurrent thread now may add sequenceids for this family // (see above in getOrCreateLowestSequenceId). Make sure you are ok with this. Usually it // is fine because updates are blocked when this method is called. Make sure!!! - for (byte[] familyName: families) { - Long seqId = m.remove(familyName); + for (byte[] familyName : families) { + ImmutableByteArray familyNameWrapper = ImmutableByteArray.wrap(familyName); + Long seqId = m.remove(familyNameWrapper); if (seqId != null) { - if (oldSequenceIds == null) oldSequenceIds = Maps.newTreeMap(Bytes.BYTES_COMPARATOR); - oldSequenceIds.put(familyName, seqId); + if (oldSequenceIds == null) { + oldSequenceIds = new HashMap<>(); + } + oldSequenceIds.put(familyNameWrapper, seqId); } } if (oldSequenceIds != null && !oldSequenceIds.isEmpty()) { @@ -262,7 +284,7 @@ class SequenceIdAccounting { return lowestUnflushedInRegion; } - void completeCacheFlush(final byte [] encodedRegionName) { + void completeCacheFlush(final byte[] encodedRegionName) { synchronized (tieLock) { this.flushingSequenceIds.remove(encodedRegionName); } @@ -271,16 +293,16 @@ class SequenceIdAccounting { void abortCacheFlush(final byte[] encodedRegionName) { // Method is called when we are crashing down because failed write flush AND it is called // if we fail prepare. The below is for the fail prepare case; we restore the old sequence ids. - Map<byte[], Long> flushing = null; - Map<byte[], Long> tmpMap = new TreeMap<byte[], Long>(Bytes.BYTES_COMPARATOR); + Map<ImmutableByteArray, Long> flushing = null; + Map<ImmutableByteArray, Long> tmpMap = new HashMap<>(); // Here we are moving sequenceids from flushing back to unflushed; doing opposite of what // happened in startCacheFlush. During prepare phase, we have update lock on the region so // no edits should be coming in via append. synchronized (tieLock) { flushing = this.flushingSequenceIds.remove(encodedRegionName); if (flushing != null) { - Map<byte[], Long> unflushed = getOrCreateLowestSequenceIds(encodedRegionName); - for (Map.Entry<byte[], Long> e: flushing.entrySet()) { + Map<ImmutableByteArray, Long> unflushed = getOrCreateLowestSequenceIds(encodedRegionName); + for (Map.Entry<ImmutableByteArray, Long> e: flushing.entrySet()) { // Set into unflushed the 'old' oldest sequenceid and if any value in flushed with this // value, it will now be in tmpMap. tmpMap.put(e.getKey(), unflushed.put(e.getKey(), e.getValue())); @@ -291,12 +313,12 @@ class SequenceIdAccounting { // Here we are doing some 'test' to see if edits are going in out of order. What is it for? // Carried over from old code. if (flushing != null) { - for (Map.Entry<byte[], Long> e : flushing.entrySet()) { + for (Map.Entry<ImmutableByteArray, Long> e : flushing.entrySet()) { Long currentId = tmpMap.get(e.getKey()); if (currentId != null && currentId.longValue() <= e.getValue().longValue()) { - String errorStr = Bytes.toString(encodedRegionName) + " family " + - Bytes.toString(e.getKey()) + " acquired edits out of order current memstore seq=" + - currentId + ", previous oldest unflushed id=" + e.getValue(); + String errorStr = Bytes.toString(encodedRegionName) + " family " + + e.getKey().toStringUtf8() + " acquired edits out of order current memstore seq=" + + currentId + ", previous oldest unflushed id=" + e.getValue(); LOG.error(errorStr); Runtime.getRuntime().halt(1); } @@ -307,57 +329,63 @@ class SequenceIdAccounting { /** * See if passed <code>sequenceids</code> are lower -- i.e. earlier -- than any outstanding * sequenceids, sequenceids we are holding on to in this accounting instance. - * @param sequenceids Keyed by encoded region name. Cannot be null (doesn't make - * sense for it to be null). + * @param sequenceids Keyed by encoded region name. Cannot be null (doesn't make sense for it to + * be null). * @return true if all sequenceids are lower, older than, the old sequenceids in this instance. */ - boolean areAllLower(Map<byte[], Long> sequenceids) { - Map<byte[], Long> flushing = null; - Map<byte[], Long> unflushed = null; - synchronized (this.tieLock) { - // Get a flattened -- only the oldest sequenceid -- copy of current flushing and unflushed - // data structures to use in tests below. - flushing = flattenToLowestSequenceId(this.flushingSequenceIds); - unflushed = flattenToLowestSequenceId(this.lowestUnflushedSequenceIds); - } + boolean areAllLower(Map<byte[], Long> sequenceids) { + Map<byte[], Long> flushing = null; + Map<byte[], Long> unflushed = null; + synchronized (this.tieLock) { + // Get a flattened -- only the oldest sequenceid -- copy of current flushing and unflushed + // data structures to use in tests below. + flushing = flattenToLowestSequenceId(this.flushingSequenceIds); + unflushed = flattenToLowestSequenceId(this.lowestUnflushedSequenceIds); + } for (Map.Entry<byte[], Long> e : sequenceids.entrySet()) { long oldestFlushing = Long.MAX_VALUE; long oldestUnflushed = Long.MAX_VALUE; - if (flushing != null) { - if (flushing.containsKey(e.getKey())) oldestFlushing = flushing.get(e.getKey()); + if (flushing != null && flushing.containsKey(e.getKey())) { + oldestFlushing = flushing.get(e.getKey()); } - if (unflushed != null) { - if (unflushed.containsKey(e.getKey())) oldestUnflushed = unflushed.get(e.getKey()); + if (unflushed != null && unflushed.containsKey(e.getKey())) { + oldestUnflushed = unflushed.get(e.getKey()); } long min = Math.min(oldestFlushing, oldestUnflushed); - if (min <= e.getValue()) return false; + if (min <= e.getValue()) { + return false; + } } return true; } - /** - * Iterates over the given Map and compares sequence ids with corresponding - * entries in {@link #oldestUnflushedRegionSequenceIds}. If a region in - * {@link #oldestUnflushedRegionSequenceIds} has a sequence id less than that passed - * in <code>sequenceids</code> then return it. - * @param sequenceids Sequenceids keyed by encoded region name. - * @return regions found in this instance with sequence ids less than those passed in. - */ - byte[][] findLower(Map<byte[], Long> sequenceids) { - List<byte[]> toFlush = null; - // Keeping the old behavior of iterating unflushedSeqNums under oldestSeqNumsLock. - synchronized (tieLock) { - for (Map.Entry<byte[], Long> e: sequenceids.entrySet()) { - Map<byte[], Long> m = this.lowestUnflushedSequenceIds.get(e.getKey()); - if (m == null) continue; - // The lowest sequence id outstanding for this region. - long lowest = getLowestSequenceId(m); - if (lowest != HConstants.NO_SEQNUM && lowest <= e.getValue()) { - if (toFlush == null) toFlush = new ArrayList<byte[]>(); - toFlush.add(e.getKey()); - } - } - } - return toFlush == null? null: toFlush.toArray(new byte[][] { HConstants.EMPTY_BYTE_ARRAY }); - } -} \ No newline at end of file + /** + * Iterates over the given Map and compares sequence ids with corresponding entries in + * {@link #oldestUnflushedRegionSequenceIds}. If a region in + * {@link #oldestUnflushedRegionSequenceIds} has a sequence id less than that passed in + * <code>sequenceids</code> then return it. + * @param sequenceids Sequenceids keyed by encoded region name. + * @return regions found in this instance with sequence ids less than those passed in. + */ + byte[][] findLower(Map<byte[], Long> sequenceids) { + List<byte[]> toFlush = null; + // Keeping the old behavior of iterating unflushedSeqNums under oldestSeqNumsLock. + synchronized (tieLock) { + for (Map.Entry<byte[], Long> e : sequenceids.entrySet()) { + Map<ImmutableByteArray, Long> m = this.lowestUnflushedSequenceIds.get(e.getKey()); + if (m == null) { + continue; + } + // The lowest sequence id outstanding for this region. + long lowest = getLowestSequenceId(m); + if (lowest != HConstants.NO_SEQNUM && lowest <= e.getValue()) { + if (toFlush == null) { + toFlush = new ArrayList<byte[]>(); + } + toFlush.add(e.getKey()); + } + } + } + return toFlush == null ? null : toFlush.toArray(new byte[0][]); + } +}
