This is an automated email from the ASF dual-hosted git repository.
zhangduo pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new a2bd6fb1db4 HBASE-29795 RegionServer abort because of NPE when closing
compacted store file (#7576)
a2bd6fb1db4 is described below
commit a2bd6fb1db4ba8b58fff57fe46acabe32689baa4
Author: Duo Zhang <[email protected]>
AuthorDate: Sun Dec 28 22:22:55 2025 +0800
HBASE-29795 RegionServer abort because of NPE when closing compacted store
file (#7576)
Signed-off-by: Wellington Ramos Chevreuil <[email protected]>
(cherry picked from commit 1a3e371ca0a9c5cc3a884cb6121a4c8769f25a70)
---
.../apache/hadoop/hbase/util/FastStringPool.java | 158 ++++++++++
.../hadoop/hbase/util/TestFastStringPool.java | 113 +++++++
.../hadoop/hbase/io/hfile/BlockCacheKey.java | 34 ++-
.../hadoop/hbase/io/hfile/bucket/BucketCache.java | 15 -
.../hbase/io/hfile/bucket/FilePathStringPool.java | 176 -----------
.../hbase/io/hfile/bucket/TestBucketCache.java | 32 --
.../io/hfile/bucket/TestFilePathStringPool.java | 326 ---------------------
7 files changed, 290 insertions(+), 564 deletions(-)
diff --git
a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/FastStringPool.java
b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/FastStringPool.java
new file mode 100644
index 00000000000..a5f0dc17bcc
--- /dev/null
+++
b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/FastStringPool.java
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import java.lang.ref.ReferenceQueue;
+import java.lang.ref.WeakReference;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.yetus.audience.InterfaceAudience;
+
+import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
+
+/**
+ * A string pool like {@link String#intern()}, but more flexible as we can
create multiple instances
+ * and use them in difference places, where {@link String#intern()} is global.
+ * <p>
+ * We use {@link WeakReference} so when there are no actual reference to the
String, it will be GCed
+ * to reduce memory pressure.
+ * <p>
+ * The difference between {@link WeakObjectPool} is that, we also need to use
{@link WeakReference}
+ * as key, not only value, because the key(a String) is exactly what we want
to deduplicate.
+ */
[email protected]
+public class FastStringPool {
+
+ private static final class WeakKey extends WeakReference<String> {
+
+ private final int hash;
+
+ WeakKey(String referent, ReferenceQueue<String> queue) {
+ super(Preconditions.checkNotNull(referent), queue);
+ // must calculate it here, as later the referent may be GCed
+ this.hash = referent.hashCode();
+ }
+
+ @Override
+ public int hashCode() {
+ return hash;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (!(obj instanceof WeakKey)) {
+ return false;
+ }
+
+ String a = this.get();
+ String b = ((WeakKey) obj).get();
+ // In ConcurrentHashMap, we will always compare references(like
entry.key == key) before
+ // calling actual equals method, so this will not cause problems for
clean up. And in normal
+ // intern path, the reference will never be null, so there is no problem
too.
+ if (a == null || b == null) {
+ return false;
+ }
+ return a.equals(b);
+ }
+ }
+
+ private final ConcurrentHashMap<WeakKey, WeakReference<String>> map = new
ConcurrentHashMap<>();
+
+ private final ReferenceQueue<String> refQueue = new ReferenceQueue<>();
+
+ private final Lock cleanupLock = new ReentrantLock();
+
+ // only call cleanup every 256 times
+ private static final int CLEANUP_MASK = 0xFF;
+ private final AtomicInteger counter = new AtomicInteger();
+
+ public String intern(String s) {
+ Preconditions.checkNotNull(s);
+ maybeCleanup();
+
+ WeakKey lookupKey = new WeakKey(s, null);
+ WeakReference<String> ref = map.get(lookupKey);
+ if (ref != null) {
+ String v = ref.get();
+ if (v != null) {
+ return v;
+ }
+ }
+
+ WeakKey storeKey = new WeakKey(s, refQueue);
+ WeakReference<String> storeVal = new WeakReference<>(s);
+ // Used to store the return value. The return value of compute method is a
WeakReference, the
+ // value of the WeakReference may be GCed just before we get it for
returning.
+ MutableObject<String> ret = new MutableObject<>();
+
+ map.compute(storeKey, (k, prevVal) -> {
+ if (prevVal == null) {
+ ret.setValue(s);
+ return storeVal;
+ } else {
+ String prevRef = prevVal.get();
+ if (prevRef != null) {
+ ret.setValue(prevRef);
+ return prevVal;
+ } else {
+ ret.setValue(s);
+ return storeVal;
+ }
+ }
+ });
+ assert ret.get() != null;
+ return ret.get();
+ }
+
+ private void cleanup() {
+ if (!cleanupLock.tryLock()) {
+ // a cleanup task is ongoing, give up
+ return;
+ }
+ try {
+ for (;;) {
+ WeakKey k = (WeakKey) refQueue.poll();
+ if (k == null) {
+ return;
+ }
+ map.remove(k);
+ }
+ } finally {
+ cleanupLock.unlock();
+ }
+ }
+
+ private void maybeCleanup() {
+ if ((counter.incrementAndGet() & CLEANUP_MASK) != 0) {
+ return;
+ }
+ cleanup();
+ }
+
+ public int size() {
+ // size method is not on critical path, so always call cleanup here to
reduce memory pressure
+ cleanup();
+ return map.size();
+ }
+}
diff --git
a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestFastStringPool.java
b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestFastStringPool.java
new file mode 100644
index 00000000000..dd0a8339334
--- /dev/null
+++
b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestFastStringPool.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Tag(SmallTests.TAG)
+public class TestFastStringPool {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(TestFastStringPool.class);
+
+ @Test
+ public void testMultiThread() throws InterruptedException {
+ FastStringPool pool = new FastStringPool();
+ List<String> list1 = new ArrayList<>();
+ List<String> list2 = new ArrayList<>();
+ for (int i = 0; i < 1000; i++) {
+ list1.add("list1-" + i);
+ list2.add("list2-" + i);
+ }
+ Map<String, String> interned1 = new HashMap<>();
+ Map<String, String> interned2 = new HashMap<>();
+ AtomicBoolean failed = new AtomicBoolean(false);
+ int numThreads = 10;
+ List<Thread> threads = new ArrayList<>();
+ for (int i = 0; i < numThreads; i++) {
+ threads.add(new Thread(() -> {
+ ThreadLocalRandom rand = ThreadLocalRandom.current();
+ for (int j = 0; j < 1000000; j++) {
+ List<String> list;
+ Map<String, String> interned;
+ if (rand.nextBoolean()) {
+ list = list1;
+ interned = interned1;
+ } else {
+ list = list2;
+ interned = interned2;
+ }
+ // create a new reference
+ String k = new String(list.get(rand.nextInt(list.size())));
+ String v = pool.intern(k);
+ synchronized (interned) {
+ String prev = interned.get(k);
+ if (prev != null) {
+ // should always return the same reference
+ if (prev != v) {
+ failed.set(true);
+ String msg = "reference not equal, intern failed on string " +
k;
+ LOG.error(msg);
+ throw new AssertionError(msg);
+ }
+ } else {
+ interned.put(k, v);
+ }
+ }
+ }
+ }));
+ }
+ for (Thread t : threads) {
+ t.start();
+ }
+ for (Thread t : threads) {
+ t.join();
+ }
+ LOG.info("interned1 size {}, interned2 size {}, pool size {}",
interned1.size(),
+ interned2.size(), pool.size());
+ assertEquals(interned1.size() + interned2.size(), pool.size());
+ interned1.clear();
+ list1.clear();
+ LOG.info("clear interned1");
+ // wait for at most 30 times
+ for (int i = 0; i < 30; i++) {
+ // invoke gc manually
+ LOG.info("trigger GC");
+ System.gc();
+ Thread.sleep(1000);
+ // should have cleaned up all the references for list1
+ if (interned2.size() == pool.size()) {
+ return;
+ }
+ }
+ fail("should only have list2 strings in pool, expected pool size " +
interned2.size()
+ + ", but got " + pool.size());
+ }
+}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
index 2142de7053f..254f0adbd5f 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
@@ -19,8 +19,8 @@ package org.apache.hadoop.hbase.io.hfile;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.io.HeapSize;
-import org.apache.hadoop.hbase.io.hfile.bucket.FilePathStringPool;
import org.apache.hadoop.hbase.util.ClassSize;
+import org.apache.hadoop.hbase.util.FastStringPool;
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
import org.apache.yetus.audience.InterfaceAudience;
@@ -29,17 +29,22 @@ import org.apache.yetus.audience.InterfaceAudience;
*/
@InterfaceAudience.Private
public class BlockCacheKey implements HeapSize, java.io.Serializable {
- private static final long serialVersionUID = -5199992013113130535L; //
Changed due to format
- // change
+ // Changed due to format change
+ private static final long serialVersionUID = -5199992013113130535L;
+
+ private static final FastStringPool HFILE_NAME_POOL = new FastStringPool();
+
+ private static final FastStringPool REGION_NAME_POOL = new FastStringPool();
+
+ private static final FastStringPool CF_NAME_POOL = new FastStringPool();
// New compressed format using integer file ID (when codec is available)
- private final int hfileNameId;
- private transient final FilePathStringPool stringPool;
+ private final String hfileName;
- private final int regionId;
+ private final String regionName;
- private final int cfId;
+ private final String cfName;
private final long offset;
@@ -91,11 +96,10 @@ public class BlockCacheKey implements HeapSize,
java.io.Serializable {
this.isPrimaryReplicaBlock = isPrimaryReplica;
this.offset = offset;
this.blockType = blockType;
- this.stringPool = FilePathStringPool.getInstance();
// Use string pool for file, region and cf values
- this.hfileNameId = stringPool.encode(hfileName);
- this.regionId = (regionName != null) ? stringPool.encode(regionName) : -1;
- this.cfId = (cfName != null) ? stringPool.encode(cfName) : -1;
+ this.hfileName = HFILE_NAME_POOL.intern(hfileName);
+ this.regionName = (regionName != null) ?
REGION_NAME_POOL.intern(regionName) : null;
+ this.cfName = (cfName != null) ? CF_NAME_POOL.intern(cfName) : null;
this.archived = archived;
}
@@ -116,7 +120,7 @@ public class BlockCacheKey implements HeapSize,
java.io.Serializable {
@Override
public int hashCode() {
- return hfileNameId * 127 + (int) (offset ^ (offset >>> 32));
+ return hfileName.hashCode() * 127 + (int) (offset ^ (offset >>> 32));
}
@Override
@@ -152,7 +156,7 @@ public class BlockCacheKey implements HeapSize,
java.io.Serializable {
* @return The file name
*/
public String getHfileName() {
- return stringPool.decode(hfileNameId);
+ return hfileName;
}
/**
@@ -160,7 +164,7 @@ public class BlockCacheKey implements HeapSize,
java.io.Serializable {
* @return The region name
*/
public String getRegionName() {
- return stringPool.decode(regionId);
+ return regionName;
}
/**
@@ -168,7 +172,7 @@ public class BlockCacheKey implements HeapSize,
java.io.Serializable {
* @return The column family name
*/
public String getCfName() {
- return stringPool.decode(cfId);
+ return cfName;
}
public boolean isPrimary() {
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
index 493a8c0b01d..caae3efda54 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
@@ -397,7 +397,6 @@ public class BucketCache implements BlockCache, HeapSize {
LOG.warn("Can't restore from file[{}]. The bucket cache will be reset
and rebuilt."
+ " Exception seen: ", persistencePath, ex);
backingMap.clear();
- FilePathStringPool.getInstance().clear();
fullyCachedFiles.clear();
backingMapValidated.set(true);
regionCachedSize.clear();
@@ -780,7 +779,6 @@ public class BucketCache implements BlockCache, HeapSize {
// remove the entry for that region from regionCachedSize map.
if (regionCachedSize.get(regionName) <= 0) {
regionCachedSize.remove(regionName);
- FilePathStringPool.getInstance().remove(regionName);
}
}
}
@@ -1005,7 +1003,6 @@ public class BucketCache implements BlockCache, HeapSize {
+ cacheStats.getEvictedCount() + ", " + "evictedPerRun=" +
cacheStats.evictedPerEviction()
+ ", " + "allocationFailCount=" + cacheStats.getAllocationFailCount() +
", blocksCount="
+ backingMap.size());
- LOG.info(FilePathStringPool.getInstance().getPoolStats());
cacheStats.reset();
bucketAllocator.logDebugStatistics();
@@ -1750,7 +1747,6 @@ public class BucketCache implements BlockCache, HeapSize {
}
private void retrieveChunkedBackingMap(FileInputStream in) throws
IOException {
-
// Read the first chunk that has all the details.
BucketCacheProtos.BucketCacheEntry cacheEntry =
BucketCacheProtos.BucketCacheEntry.parseDelimitedFrom(in);
@@ -1760,7 +1756,6 @@ public class BucketCache implements BlockCache, HeapSize {
backingMap.clear();
blocksByHFile.clear();
- FilePathStringPool.getInstance().clear();
// Read the backing map entries in batches.
int numChunks = 0;
@@ -1816,7 +1811,6 @@ public class BucketCache implements BlockCache, HeapSize {
this.blocksByHFile.clear();
this.fullyCachedFiles.clear();
this.regionCachedSize.clear();
- FilePathStringPool.getInstance().clear();
}
if (cacheStats.getMetricsRollerScheduler() != null) {
cacheStats.getMetricsRollerScheduler().shutdownNow();
@@ -1845,7 +1839,6 @@ public class BucketCache implements BlockCache, HeapSize {
}
}
persistToFile();
- FilePathStringPool.getInstance().clear();
} catch (IOException ex) {
LOG.error("Unable to persist data on exit: " + ex.toString(), ex);
} catch (InterruptedException e) {
@@ -1936,9 +1929,6 @@ public class BucketCache implements BlockCache, HeapSize {
Set<BlockCacheKey> keySet = getAllCacheKeysForFile(hfileName, initOffset,
endOffset);
// We need to make sure whether we are evicting all blocks for this given
file. In case of
// split references, we might be evicting just half of the blocks
- int totalFileKeys = (endOffset == Long.MAX_VALUE)
- ? keySet.size()
- : getAllCacheKeysForFile(hfileName, 0, Long.MAX_VALUE).size();
LOG.debug("found {} blocks for file {}, starting offset: {}, end offset:
{}", keySet.size(),
hfileName, initOffset, endOffset);
int numEvicted = 0;
@@ -1947,11 +1937,6 @@ public class BucketCache implements BlockCache, HeapSize
{
++numEvicted;
}
}
- if (numEvicted > 0) {
- if (totalFileKeys == numEvicted) {
- FilePathStringPool.getInstance().remove(hfileName);
- }
- }
return numEvicted;
}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FilePathStringPool.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FilePathStringPool.java
deleted file mode 100644
index 7e08158af0e..00000000000
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FilePathStringPool.java
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.io.hfile.bucket;
-
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.atomic.AtomicInteger;
-import org.apache.yetus.audience.InterfaceAudience;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Pool of string values encoded to integer IDs for use in BlockCacheKey. This
allows for avoiding
- * duplicating string values for file names, region and CF values on various
BlockCacheKey
- * instances. Normally, single hfiles have many blocks. This means all blocks
from the same file
- * will have the very same file, region and CF names. On very large
BucketCache setups (i.e. file
- * based cache with TB size order), can save few GBs of memory by avoiding
repeating these common
- * string values on blocks from the same file. The FilePathStringPool is
implemented as a singleton,
- * since the same pool should be shared by all BlockCacheKey instances, as
well as the BucketCache
- * object itself. The Id for an encoded string is an integer. Any new String
added to the pool is
- * assigned the next available integer ID, starting from 0 upwards. That sets
the total pool
- * capacity to Integer.MAX_VALUE. In the event of ID exhaustion (integer
overflow when Id values
- * reach Integer.MAX_VALUE), the encode() method will restart iterating over
int values
- * incrementally from 0 until it finds an unused ID. Strings can be removed
from the pool using the
- * remove() method. BucketCache should call this when evicting all blocks for
a given file (see
- * BucketCache.evictFileBlocksFromCache()).
- * <p>
- * Thread-safe implementation that maintains bidirectional mappings between
strings and IDs.
- * </p>
- */
[email protected]
-public class FilePathStringPool {
- private static final Logger LOG =
LoggerFactory.getLogger(FilePathStringPool.class);
-
- // Bidirectional mappings for string objects re-use
- private final ConcurrentHashMap<String, Integer> stringToId = new
ConcurrentHashMap<>();
- private final ConcurrentHashMap<Integer, String> idToString = new
ConcurrentHashMap<>();
- private final AtomicInteger nextId = new AtomicInteger(0);
-
- private static FilePathStringPool instance;
-
- public static FilePathStringPool getInstance() {
- synchronized (FilePathStringPool.class) {
- if (instance == null) {
- instance = new FilePathStringPool();
- }
- }
- return instance;
- }
-
- private FilePathStringPool() {
- // Private constructor for singleton
- }
-
- /**
- * Gets or creates an integer ID for the given String.
- * @param string value for the file/region/CF name.
- * @return the integer ID encoding this string in the pool.
- */
- public int encode(String string) {
- if (string == null) {
- throw new IllegalArgumentException("string cannot be null");
- }
- return stringToId.computeIfAbsent(string, name -> {
- if (stringToId.size() == Integer.MAX_VALUE) {
- throw new IllegalStateException(
- "String pool has reached maximum capacity of " + Integer.MAX_VALUE +
" unique strings.");
- }
- int id = nextId.getAndIncrement();
- while (idToString.containsKey(id)) {
- id = nextId.getAndIncrement();
- if (id == Integer.MAX_VALUE) {
- nextId.set(0);
- LOG.info("Id values reached Integer.MAX_VALUE, restarting from 0");
- }
- }
- idToString.put(id, name);
- LOG.trace("Encoded new string to ID {}: {}", id, name);
- return id;
- });
- }
-
- /**
- * Decodes an integer ID back to its original file name.
- * @param id the integer ID
- * @return the original file name, or null if not found
- */
- public String decode(int id) {
- return idToString.get(id);
- }
-
- /**
- * Checks if a given string ID is already being used.
- * @param id the integer ID to check
- * @return true if the ID exists
- */
- public boolean contains(int id) {
- return idToString.containsKey(id);
- }
-
- /**
- * Checks if a given string has been encoded.
- * @param string the value to check
- * @return true if the string value has been encoded
- */
- public boolean contains(String string) {
- return stringToId.containsKey(string);
- }
-
- /**
- * Gets the number of unique file names currently tracked.
- * @return the number of entries in the codec
- */
- public int size() {
- return stringToId.size();
- }
-
- /**
- * Removes a string value and its ID from the pool. This should only be
called when all blocks for
- * a file have been evicted from the cache.
- * @param string the file name to remove
- * @return true if the file name was removed, false if it wasn't present
- */
- public boolean remove(String string) {
- if (string == null) {
- return false;
- }
- Integer id = stringToId.remove(string);
- if (id != null) {
- idToString.remove(id);
- LOG.debug("Removed string value from pool: {} (ID: {})", string, id);
- return true;
- }
- return false;
- }
-
- /**
- * Clears all mappings from the codec.
- */
- public void clear() {
- stringToId.clear();
- idToString.clear();
- nextId.set(0);
- LOG.info("Cleared all file name mappings from codec");
- }
-
- /**
- * Gets statistics about memory savings from string pooling.
- * @return a formatted string with compression statistics
- */
- public String getPoolStats() {
- long uniqueStrings = stringToId.size();
- if (uniqueStrings == 0) {
- return "No strings encoded";
- }
- // Calculate average string length
- long totalChars =
stringToId.keySet().stream().mapToLong(String::length).sum();
- double avgLength = (double) totalChars / uniqueStrings;
- return String.format("FilePathStringPool stats: %d unique strings, avg
length: %.1f chars, ",
- uniqueStrings, avgLength);
- }
-}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
index f588eb3a7d0..467c7483398 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
@@ -49,7 +49,6 @@ import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.Random;
import java.util.Set;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.LongAdder;
@@ -681,37 +680,6 @@ public class TestBucketCache {
assertEquals(1, cache.getStats().getEvictionCount());
}
- @Test
- public void testStringPool() throws Exception {
- HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
- Path testDir = TEST_UTIL.getDataTestDir();
- TEST_UTIL.getTestFileSystem().mkdirs(testDir);
- BucketCache bucketCache =
- new BucketCache("file:" + testDir + "/bucket.cache", capacitySize,
constructedBlockSize,
- constructedBlockSizes, writeThreads, writerQLen, testDir +
"/bucket.persistence");
- assertTrue(bucketCache.waitForCacheInitialization(10000));
- long usedSize = bucketCache.getAllocator().getUsedSize();
- assertEquals(0, usedSize);
- Random rand = ThreadLocalRandom.current();
- Path filePath = new Path(testDir, Long.toString(rand.nextLong()));
- CacheTestUtils.HFileBlockPair[] blocks =
- CacheTestUtils.generateBlocksForPath(constructedBlockSize, 1, filePath,
false);
- String name = blocks[0].getBlockName().getHfileName();
- assertEquals(name, filePath.getName());
- assertNotNull(blocks[0].getBlockName().getRegionName());
- bucketCache.cacheBlock(blocks[0].getBlockName(), blocks[0].getBlock());
- waitUntilFlushedToBucket(bucketCache, blocks[0].getBlockName());
- assertTrue(FilePathStringPool.getInstance().size() > 0);
- bucketCache.evictBlock(blocks[0].getBlockName());
- assertTrue(FilePathStringPool.getInstance().size() > 0);
- bucketCache.cacheBlock(blocks[0].getBlockName(), blocks[0].getBlock());
- waitUntilFlushedToBucket(bucketCache, blocks[0].getBlockName());
- bucketCache.fileCacheCompleted(filePath,
- bucketCache.backingMap.get(blocks[0].getBlockName()).getLength());
- bucketCache.evictBlocksByHfileName(name);
- assertEquals(1, FilePathStringPool.getInstance().size());
- }
-
@Test
public void testCacheBlockNextBlockMetadataMissing() throws Exception {
int size = 100;
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFilePathStringPool.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFilePathStringPool.java
deleted file mode 100644
index a42a61c93fb..00000000000
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFilePathStringPool.java
+++ /dev/null
@@ -1,326 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.io.hfile.bucket;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-import org.apache.hadoop.hbase.HBaseClassTestRule;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.junit.Before;
-import org.junit.ClassRule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * Tests for {@link FilePathStringPool}
- */
-@Category({ SmallTests.class })
-public class TestFilePathStringPool {
-
- @ClassRule
- public static final HBaseClassTestRule CLASS_RULE =
- HBaseClassTestRule.forClass(TestFilePathStringPool.class);
-
- private FilePathStringPool pool;
-
- @Before
- public void setUp() {
- pool = FilePathStringPool.getInstance();
- pool.clear();
- }
-
- @Test
- public void testSingletonPattern() {
- FilePathStringPool instance1 = FilePathStringPool.getInstance();
- FilePathStringPool instance2 = FilePathStringPool.getInstance();
- assertNotNull(instance1);
- assertNotNull(instance2);
- assertEquals(instance1, instance2);
- }
-
- @Test
- public void testBasicEncodeDecodeRoundTrip() {
- String testString =
"/hbase/data/default/test-table/region1/cf1/file1.hfile";
- int id = pool.encode(testString);
- String decoded = pool.decode(id);
- assertEquals(testString, decoded);
- }
-
- @Test
- public void testEncodeReturnsSameIdForSameString() {
- String testString = "/hbase/data/file.hfile";
- int id1 = pool.encode(testString);
- int id2 = pool.encode(testString);
- assertEquals(id1, id2);
- assertEquals(1, pool.size());
- }
-
- @Test
- public void testEncodeDifferentStringsGetDifferentIds() {
- String string1 = "/path/to/file1.hfile";
- String string2 = "/path/to/file2.hfile";
- int id1 = pool.encode(string1);
- int id2 = pool.encode(string2);
- assertNotEquals(id1, id2);
- assertEquals(2, pool.size());
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testEncodeNullStringThrowsException() {
- pool.encode(null);
- }
-
- @Test
- public void testDecodeNonExistentIdReturnsNull() {
- String decoded = pool.decode(999999);
- assertNull(decoded);
- }
-
- @Test
- public void testContainsWithId() {
- String testString = "/hbase/file.hfile";
- int id = pool.encode(testString);
- assertTrue(pool.contains(id));
- assertFalse(pool.contains(id + 1));
- }
-
- @Test
- public void testContainsWithString() {
- String testString = "/hbase/file.hfile";
- pool.encode(testString);
- assertTrue(pool.contains(testString));
- assertFalse(pool.contains("/hbase/other-file.hfile"));
- }
-
- @Test
- public void testRemoveExistingString() {
- String testString = "/hbase/file.hfile";
- int id = pool.encode(testString);
- assertEquals(1, pool.size());
- assertTrue(pool.contains(testString));
- boolean removed = pool.remove(testString);
- assertTrue(removed);
- assertEquals(0, pool.size());
- assertFalse(pool.contains(testString));
- assertFalse(pool.contains(id));
- assertNull(pool.decode(id));
- }
-
- @Test
- public void testRemoveNonExistentStringReturnsFalse() {
- boolean removed = pool.remove("/non/existent/file.hfile");
- assertFalse(removed);
- }
-
- @Test
- public void testRemoveNullStringReturnsFalse() {
- boolean removed = pool.remove(null);
- assertFalse(removed);
- }
-
- @Test
- public void testClear() {
- pool.encode("/file1.hfile");
- pool.encode("/file2.hfile");
- pool.encode("/file3.hfile");
- assertEquals(3, pool.size());
- pool.clear();
- assertEquals(0, pool.size());
- }
-
- @Test
- public void testSizeTracking() {
- assertEquals(0, pool.size());
- pool.encode("/file1.hfile");
- assertEquals(1, pool.size());
- pool.encode("/file2.hfile");
- assertEquals(2, pool.size());
- // Encoding same string should not increase size
- pool.encode("/file1.hfile");
- assertEquals(2, pool.size());
- pool.remove("/file1.hfile");
- assertEquals(1, pool.size());
- pool.clear();
- assertEquals(0, pool.size());
- }
-
- @Test
- public void testGetPoolStats() {
- String stats = pool.getPoolStats();
- assertEquals("No strings encoded", stats);
- pool.encode("/hbase/data/table1/file1.hfile");
- pool.encode("/hbase/data/table2/file2.hfile");
- stats = pool.getPoolStats();
- assertNotNull(stats);
- assertTrue(stats.contains("2 unique strings"));
- assertTrue(stats.contains("avg length:"));
- }
-
- @Test
- public void testConcurrentEncoding() throws InterruptedException {
- int numThreads = 10;
- int stringsPerThread = 100;
- ExecutorService executor = Executors.newFixedThreadPool(numThreads);
- CountDownLatch doneLatch = new CountDownLatch(numThreads);
- ConcurrentHashMap<String, Integer> results = new ConcurrentHashMap<>();
- AtomicInteger errorCount = new AtomicInteger(0);
-
- for (int t = 0; t < numThreads; t++) {
- final int threadId = t;
- executor.submit(() -> {
- try {
- for (int i = 0; i < stringsPerThread; i++) {
- String string = "/thread" + threadId + "/file" + i + ".hfile";
- int id = pool.encode(string);
- results.put(string, id);
- }
- } catch (Exception e) {
- errorCount.incrementAndGet();
- } finally {
- doneLatch.countDown();
- }
- });
- }
-
- assertTrue(doneLatch.await(30, TimeUnit.SECONDS));
- executor.shutdown();
-
- assertEquals(0, errorCount.get());
- assertEquals(numThreads * stringsPerThread, pool.size());
- assertEquals(numThreads * stringsPerThread, results.size());
-
- // Verify all strings can be decoded correctly
- for (Map.Entry<String, Integer> entry : results.entrySet()) {
- String decoded = pool.decode(entry.getValue());
- assertEquals(entry.getKey(), decoded);
- }
- }
-
- @Test
- public void testConcurrentEncodingSameStrings() throws InterruptedException {
- int numThreads = 20;
- String sharedString = "/shared/file.hfile";
- ExecutorService executor = Executors.newFixedThreadPool(numThreads);
- CountDownLatch doneLatch = new CountDownLatch(numThreads);
- Set<Integer> ids = ConcurrentHashMap.newKeySet();
- AtomicInteger errorCount = new AtomicInteger(0);
-
- for (int i = 0; i < numThreads; i++) {
- executor.submit(() -> {
- try {
- int id = pool.encode(sharedString);
- ids.add(id);
- } catch (Exception e) {
- errorCount.incrementAndGet();
- } finally {
- doneLatch.countDown();
- }
- });
- }
-
- doneLatch.await(10, TimeUnit.SECONDS);
- executor.shutdown();
-
- assertEquals(0, errorCount.get());
- // All threads should get the same ID
- assertEquals(1, ids.size());
- assertEquals(1, pool.size());
- }
-
- @Test
- public void testConcurrentRemoval() throws InterruptedException {
- // Pre-populate with strings
- List<String> strings = new ArrayList<>();
- for (int i = 0; i < 100; i++) {
- String string = "/file" + i + ".hfile";
- strings.add(string);
- pool.encode(string);
- }
- assertEquals(100, pool.size());
-
- int numThreads = 10;
- ExecutorService executor = Executors.newFixedThreadPool(numThreads);
- CountDownLatch doneLatch = new CountDownLatch(numThreads);
- AtomicInteger successfulRemovals = new AtomicInteger(0);
-
- for (int t = 0; t < numThreads; t++) {
- final int threadId = t;
- executor.submit(() -> {
- try {
- for (int i = threadId * 10; i < (threadId + 1) * 10; i++) {
- if (pool.remove(strings.get(i))) {
- successfulRemovals.incrementAndGet();
- }
- }
- } catch (Exception e) {
- // Ignore
- } finally {
- doneLatch.countDown();
- }
- });
- }
-
- doneLatch.await(10, TimeUnit.SECONDS);
- executor.shutdown();
-
- assertEquals(100, successfulRemovals.get());
- assertEquals(0, pool.size());
- }
-
- @Test
- public void testBidirectionalMappingConsistency() {
- // Verify that both mappings stay consistent
- List<String> strings = new ArrayList<>();
- List<Integer> ids = new ArrayList<>();
-
- for (int i = 0; i < 50; i++) {
- String string = "/region" + (i % 5) + "/file" + i + ".hfile";
- strings.add(string);
- ids.add(pool.encode(string));
- }
-
- // Verify forward mapping (string -> id)
- for (int i = 0; i < strings.size(); i++) {
- int expectedId = ids.get(i);
- int actualId = pool.encode(strings.get(i));
- assertEquals(expectedId, actualId);
- }
-
- // Verify reverse mapping (id -> string)
- for (int i = 0; i < ids.size(); i++) {
- String expectedString = strings.get(i);
- String actualString = pool.decode(ids.get(i));
- assertEquals(expectedString, actualString);
- }
- }
-}