This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/master by this push:
     new 1a3e371ca0a HBASE-29795 RegionServer abort because of NPE when closing 
compacted store file (#7576)
1a3e371ca0a is described below

commit 1a3e371ca0a9c5cc3a884cb6121a4c8769f25a70
Author: Duo Zhang <[email protected]>
AuthorDate: Sun Dec 28 22:22:55 2025 +0800

    HBASE-29795 RegionServer abort because of NPE when closing compacted store 
file (#7576)
    
    Signed-off-by: Wellington Ramos Chevreuil <[email protected]>
---
 .../apache/hadoop/hbase/util/FastStringPool.java   | 158 ++++++++++
 .../hadoop/hbase/util/TestFastStringPool.java      | 113 +++++++
 .../hadoop/hbase/io/hfile/BlockCacheKey.java       |  34 ++-
 .../hadoop/hbase/io/hfile/bucket/BucketCache.java  |  15 -
 .../hbase/io/hfile/bucket/FilePathStringPool.java  | 176 -----------
 .../hbase/io/hfile/bucket/TestBucketCache.java     |  32 --
 .../io/hfile/bucket/TestFilePathStringPool.java    | 326 ---------------------
 7 files changed, 290 insertions(+), 564 deletions(-)

diff --git 
a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/FastStringPool.java 
b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/FastStringPool.java
new file mode 100644
index 00000000000..a5f0dc17bcc
--- /dev/null
+++ 
b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/FastStringPool.java
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import java.lang.ref.ReferenceQueue;
+import java.lang.ref.WeakReference;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.yetus.audience.InterfaceAudience;
+
+import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
+
+/**
+ * A string pool like {@link String#intern()}, but more flexible as we can 
create multiple instances
+ * and use them in difference places, where {@link String#intern()} is global.
+ * <p>
+ * We use {@link WeakReference} so when there are no actual reference to the 
String, it will be GCed
+ * to reduce memory pressure.
+ * <p>
+ * The difference between {@link WeakObjectPool} is that, we also need to use 
{@link WeakReference}
+ * as key, not only value, because the key(a String) is exactly what we want 
to deduplicate.
+ */
[email protected]
+public class FastStringPool {
+
+  private static final class WeakKey extends WeakReference<String> {
+
+    private final int hash;
+
+    WeakKey(String referent, ReferenceQueue<String> queue) {
+      super(Preconditions.checkNotNull(referent), queue);
+      // must calculate it here, as later the referent may be GCed
+      this.hash = referent.hashCode();
+    }
+
+    @Override
+    public int hashCode() {
+      return hash;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) {
+        return true;
+      }
+      if (!(obj instanceof WeakKey)) {
+        return false;
+      }
+
+      String a = this.get();
+      String b = ((WeakKey) obj).get();
+      // In ConcurrentHashMap, we will always compare references(like 
entry.key == key) before
+      // calling actual equals method, so this will not cause problems for 
clean up. And in normal
+      // intern path, the reference will never be null, so there is no problem 
too.
+      if (a == null || b == null) {
+        return false;
+      }
+      return a.equals(b);
+    }
+  }
+
+  private final ConcurrentHashMap<WeakKey, WeakReference<String>> map = new 
ConcurrentHashMap<>();
+
+  private final ReferenceQueue<String> refQueue = new ReferenceQueue<>();
+
+  private final Lock cleanupLock = new ReentrantLock();
+
+  // only call cleanup every 256 times
+  private static final int CLEANUP_MASK = 0xFF;
+  private final AtomicInteger counter = new AtomicInteger();
+
+  public String intern(String s) {
+    Preconditions.checkNotNull(s);
+    maybeCleanup();
+
+    WeakKey lookupKey = new WeakKey(s, null);
+    WeakReference<String> ref = map.get(lookupKey);
+    if (ref != null) {
+      String v = ref.get();
+      if (v != null) {
+        return v;
+      }
+    }
+
+    WeakKey storeKey = new WeakKey(s, refQueue);
+    WeakReference<String> storeVal = new WeakReference<>(s);
+    // Used to store the return value. The return value of compute method is a 
WeakReference, the
+    // value of the WeakReference may be GCed just before we get it for 
returning.
+    MutableObject<String> ret = new MutableObject<>();
+
+    map.compute(storeKey, (k, prevVal) -> {
+      if (prevVal == null) {
+        ret.setValue(s);
+        return storeVal;
+      } else {
+        String prevRef = prevVal.get();
+        if (prevRef != null) {
+          ret.setValue(prevRef);
+          return prevVal;
+        } else {
+          ret.setValue(s);
+          return storeVal;
+        }
+      }
+    });
+    assert ret.get() != null;
+    return ret.get();
+  }
+
+  private void cleanup() {
+    if (!cleanupLock.tryLock()) {
+      // a cleanup task is ongoing, give up
+      return;
+    }
+    try {
+      for (;;) {
+        WeakKey k = (WeakKey) refQueue.poll();
+        if (k == null) {
+          return;
+        }
+        map.remove(k);
+      }
+    } finally {
+      cleanupLock.unlock();
+    }
+  }
+
+  private void maybeCleanup() {
+    if ((counter.incrementAndGet() & CLEANUP_MASK) != 0) {
+      return;
+    }
+    cleanup();
+  }
+
+  public int size() {
+    // size method is not on critical path, so always call cleanup here to 
reduce memory pressure
+    cleanup();
+    return map.size();
+  }
+}
diff --git 
a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestFastStringPool.java
 
b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestFastStringPool.java
new file mode 100644
index 00000000000..dd0a8339334
--- /dev/null
+++ 
b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestFastStringPool.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Tag(SmallTests.TAG)
+public class TestFastStringPool {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(TestFastStringPool.class);
+
+  @Test
+  public void testMultiThread() throws InterruptedException {
+    FastStringPool pool = new FastStringPool();
+    List<String> list1 = new ArrayList<>();
+    List<String> list2 = new ArrayList<>();
+    for (int i = 0; i < 1000; i++) {
+      list1.add("list1-" + i);
+      list2.add("list2-" + i);
+    }
+    Map<String, String> interned1 = new HashMap<>();
+    Map<String, String> interned2 = new HashMap<>();
+    AtomicBoolean failed = new AtomicBoolean(false);
+    int numThreads = 10;
+    List<Thread> threads = new ArrayList<>();
+    for (int i = 0; i < numThreads; i++) {
+      threads.add(new Thread(() -> {
+        ThreadLocalRandom rand = ThreadLocalRandom.current();
+        for (int j = 0; j < 1000000; j++) {
+          List<String> list;
+          Map<String, String> interned;
+          if (rand.nextBoolean()) {
+            list = list1;
+            interned = interned1;
+          } else {
+            list = list2;
+            interned = interned2;
+          }
+          // create a new reference
+          String k = new String(list.get(rand.nextInt(list.size())));
+          String v = pool.intern(k);
+          synchronized (interned) {
+            String prev = interned.get(k);
+            if (prev != null) {
+              // should always return the same reference
+              if (prev != v) {
+                failed.set(true);
+                String msg = "reference not equal, intern failed on string " + 
k;
+                LOG.error(msg);
+                throw new AssertionError(msg);
+              }
+            } else {
+              interned.put(k, v);
+            }
+          }
+        }
+      }));
+    }
+    for (Thread t : threads) {
+      t.start();
+    }
+    for (Thread t : threads) {
+      t.join();
+    }
+    LOG.info("interned1 size {}, interned2 size {}, pool size {}", 
interned1.size(),
+      interned2.size(), pool.size());
+    assertEquals(interned1.size() + interned2.size(), pool.size());
+    interned1.clear();
+    list1.clear();
+    LOG.info("clear interned1");
+    // wait for at most 30 times
+    for (int i = 0; i < 30; i++) {
+      // invoke gc manually
+      LOG.info("trigger GC");
+      System.gc();
+      Thread.sleep(1000);
+      // should have cleaned up all the references for list1
+      if (interned2.size() == pool.size()) {
+        return;
+      }
+    }
+    fail("should only have list2 strings in pool, expected pool size " + 
interned2.size()
+      + ", but got " + pool.size());
+  }
+}
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
index 2142de7053f..254f0adbd5f 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
@@ -19,8 +19,8 @@ package org.apache.hadoop.hbase.io.hfile;
 
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.io.HeapSize;
-import org.apache.hadoop.hbase.io.hfile.bucket.FilePathStringPool;
 import org.apache.hadoop.hbase.util.ClassSize;
+import org.apache.hadoop.hbase.util.FastStringPool;
 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
 import org.apache.yetus.audience.InterfaceAudience;
 
@@ -29,17 +29,22 @@ import org.apache.yetus.audience.InterfaceAudience;
  */
 @InterfaceAudience.Private
 public class BlockCacheKey implements HeapSize, java.io.Serializable {
-  private static final long serialVersionUID = -5199992013113130535L; // 
Changed due to format
-                                                                      // change
+  // Changed due to format change
+  private static final long serialVersionUID = -5199992013113130535L;
+
+  private static final FastStringPool HFILE_NAME_POOL = new FastStringPool();
+
+  private static final FastStringPool REGION_NAME_POOL = new FastStringPool();
+
+  private static final FastStringPool CF_NAME_POOL = new FastStringPool();
 
   // New compressed format using integer file ID (when codec is available)
-  private final int hfileNameId;
 
-  private transient final FilePathStringPool stringPool;
+  private final String hfileName;
 
-  private final int regionId;
+  private final String regionName;
 
-  private final int cfId;
+  private final String cfName;
 
   private final long offset;
 
@@ -91,11 +96,10 @@ public class BlockCacheKey implements HeapSize, 
java.io.Serializable {
     this.isPrimaryReplicaBlock = isPrimaryReplica;
     this.offset = offset;
     this.blockType = blockType;
-    this.stringPool = FilePathStringPool.getInstance();
     // Use string pool for file, region and cf values
-    this.hfileNameId = stringPool.encode(hfileName);
-    this.regionId = (regionName != null) ? stringPool.encode(regionName) : -1;
-    this.cfId = (cfName != null) ? stringPool.encode(cfName) : -1;
+    this.hfileName = HFILE_NAME_POOL.intern(hfileName);
+    this.regionName = (regionName != null) ? 
REGION_NAME_POOL.intern(regionName) : null;
+    this.cfName = (cfName != null) ? CF_NAME_POOL.intern(cfName) : null;
     this.archived = archived;
   }
 
@@ -116,7 +120,7 @@ public class BlockCacheKey implements HeapSize, 
java.io.Serializable {
 
   @Override
   public int hashCode() {
-    return hfileNameId * 127 + (int) (offset ^ (offset >>> 32));
+    return hfileName.hashCode() * 127 + (int) (offset ^ (offset >>> 32));
   }
 
   @Override
@@ -152,7 +156,7 @@ public class BlockCacheKey implements HeapSize, 
java.io.Serializable {
    * @return The file name
    */
   public String getHfileName() {
-    return stringPool.decode(hfileNameId);
+    return hfileName;
   }
 
   /**
@@ -160,7 +164,7 @@ public class BlockCacheKey implements HeapSize, 
java.io.Serializable {
    * @return The region name
    */
   public String getRegionName() {
-    return stringPool.decode(regionId);
+    return regionName;
   }
 
   /**
@@ -168,7 +172,7 @@ public class BlockCacheKey implements HeapSize, 
java.io.Serializable {
    * @return The column family name
    */
   public String getCfName() {
-    return stringPool.decode(cfId);
+    return cfName;
   }
 
   public boolean isPrimary() {
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
index a65d0fd9895..60ba9f32cd7 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
@@ -410,7 +410,6 @@ public class BucketCache implements BlockCache, HeapSize {
         LOG.warn("Can't restore from file[{}]. The bucket cache will be reset 
and rebuilt."
           + " Exception seen: ", persistencePath, ex);
         backingMap.clear();
-        FilePathStringPool.getInstance().clear();
         fullyCachedFiles.clear();
         backingMapValidated.set(true);
         regionCachedSize.clear();
@@ -793,7 +792,6 @@ public class BucketCache implements BlockCache, HeapSize {
         // remove the entry for that region from regionCachedSize map.
         if (regionCachedSize.get(regionName) <= 0) {
           regionCachedSize.remove(regionName);
-          FilePathStringPool.getInstance().remove(regionName);
         }
       }
     }
@@ -1013,7 +1011,6 @@ public class BucketCache implements BlockCache, HeapSize {
       + cacheStats.getEvictedCount() + ", " + "evictedPerRun=" + 
cacheStats.evictedPerEviction()
       + ", " + "allocationFailCount=" + cacheStats.getAllocationFailCount() + 
", blocksCount="
       + backingMap.size());
-    LOG.info(FilePathStringPool.getInstance().getPoolStats());
     cacheStats.reset();
 
     bucketAllocator.logDebugStatistics();
@@ -1757,7 +1754,6 @@ public class BucketCache implements BlockCache, HeapSize {
   }
 
   private void retrieveChunkedBackingMap(FileInputStream in) throws 
IOException {
-
     // Read the first chunk that has all the details.
     BucketCacheProtos.BucketCacheEntry cacheEntry =
       BucketCacheProtos.BucketCacheEntry.parseDelimitedFrom(in);
@@ -1767,7 +1763,6 @@ public class BucketCache implements BlockCache, HeapSize {
 
     backingMap.clear();
     blocksByHFile.clear();
-    FilePathStringPool.getInstance().clear();
 
     // Read the backing map entries in batches.
     int numChunks = 0;
@@ -1823,7 +1818,6 @@ public class BucketCache implements BlockCache, HeapSize {
       this.blocksByHFile.clear();
       this.fullyCachedFiles.clear();
       this.regionCachedSize.clear();
-      FilePathStringPool.getInstance().clear();
     }
     if (cacheStats.getMetricsRollerScheduler() != null) {
       cacheStats.getMetricsRollerScheduler().shutdownNow();
@@ -1852,7 +1846,6 @@ public class BucketCache implements BlockCache, HeapSize {
             }
           }
           persistToFile();
-          FilePathStringPool.getInstance().clear();
         } catch (IOException ex) {
           LOG.error("Unable to persist data on exit: " + ex.toString(), ex);
         } catch (InterruptedException e) {
@@ -1943,9 +1936,6 @@ public class BucketCache implements BlockCache, HeapSize {
     Set<BlockCacheKey> keySet = getAllCacheKeysForFile(hfileName, initOffset, 
endOffset);
     // We need to make sure whether we are evicting all blocks for this given 
file. In case of
     // split references, we might be evicting just half of the blocks
-    int totalFileKeys = (endOffset == Long.MAX_VALUE)
-      ? keySet.size()
-      : getAllCacheKeysForFile(hfileName, 0, Long.MAX_VALUE).size();
     LOG.debug("found {} blocks for file {}, starting offset: {}, end offset: 
{}", keySet.size(),
       hfileName, initOffset, endOffset);
     int numEvicted = 0;
@@ -1954,11 +1944,6 @@ public class BucketCache implements BlockCache, HeapSize 
{
         ++numEvicted;
       }
     }
-    if (numEvicted > 0) {
-      if (totalFileKeys == numEvicted) {
-        FilePathStringPool.getInstance().remove(hfileName);
-      }
-    }
     return numEvicted;
   }
 
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FilePathStringPool.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FilePathStringPool.java
deleted file mode 100644
index 7e08158af0e..00000000000
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FilePathStringPool.java
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.io.hfile.bucket;
-
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.atomic.AtomicInteger;
-import org.apache.yetus.audience.InterfaceAudience;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Pool of string values encoded to integer IDs for use in BlockCacheKey. This 
allows for avoiding
- * duplicating string values for file names, region and CF values on various 
BlockCacheKey
- * instances. Normally, single hfiles have many blocks. This means all blocks 
from the same file
- * will have the very same file, region and CF names. On very large 
BucketCache setups (i.e. file
- * based cache with TB size order), can save few GBs of memory by avoiding 
repeating these common
- * string values on blocks from the same file. The FilePathStringPool is 
implemented as a singleton,
- * since the same pool should be shared by all BlockCacheKey instances, as 
well as the BucketCache
- * object itself. The Id for an encoded string is an integer. Any new String 
added to the pool is
- * assigned the next available integer ID, starting from 0 upwards. That sets 
the total pool
- * capacity to Integer.MAX_VALUE. In the event of ID exhaustion (integer 
overflow when Id values
- * reach Integer.MAX_VALUE), the encode() method will restart iterating over 
int values
- * incrementally from 0 until it finds an unused ID. Strings can be removed 
from the pool using the
- * remove() method. BucketCache should call this when evicting all blocks for 
a given file (see
- * BucketCache.evictFileBlocksFromCache()).
- * <p>
- * Thread-safe implementation that maintains bidirectional mappings between 
strings and IDs.
- * </p>
- */
[email protected]
-public class FilePathStringPool {
-  private static final Logger LOG = 
LoggerFactory.getLogger(FilePathStringPool.class);
-
-  // Bidirectional mappings for string objects re-use
-  private final ConcurrentHashMap<String, Integer> stringToId = new 
ConcurrentHashMap<>();
-  private final ConcurrentHashMap<Integer, String> idToString = new 
ConcurrentHashMap<>();
-  private final AtomicInteger nextId = new AtomicInteger(0);
-
-  private static FilePathStringPool instance;
-
-  public static FilePathStringPool getInstance() {
-    synchronized (FilePathStringPool.class) {
-      if (instance == null) {
-        instance = new FilePathStringPool();
-      }
-    }
-    return instance;
-  }
-
-  private FilePathStringPool() {
-    // Private constructor for singleton
-  }
-
-  /**
-   * Gets or creates an integer ID for the given String.
-   * @param string value for the file/region/CF name.
-   * @return the integer ID encoding this string in the pool.
-   */
-  public int encode(String string) {
-    if (string == null) {
-      throw new IllegalArgumentException("string cannot be null");
-    }
-    return stringToId.computeIfAbsent(string, name -> {
-      if (stringToId.size() == Integer.MAX_VALUE) {
-        throw new IllegalStateException(
-          "String pool has reached maximum capacity of " + Integer.MAX_VALUE + 
" unique strings.");
-      }
-      int id = nextId.getAndIncrement();
-      while (idToString.containsKey(id)) {
-        id = nextId.getAndIncrement();
-        if (id == Integer.MAX_VALUE) {
-          nextId.set(0);
-          LOG.info("Id values reached Integer.MAX_VALUE, restarting from 0");
-        }
-      }
-      idToString.put(id, name);
-      LOG.trace("Encoded new string to ID {}: {}", id, name);
-      return id;
-    });
-  }
-
-  /**
-   * Decodes an integer ID back to its original file name.
-   * @param id the integer ID
-   * @return the original file name, or null if not found
-   */
-  public String decode(int id) {
-    return idToString.get(id);
-  }
-
-  /**
-   * Checks if a given string ID is already being used.
-   * @param id the integer ID to check
-   * @return true if the ID exists
-   */
-  public boolean contains(int id) {
-    return idToString.containsKey(id);
-  }
-
-  /**
-   * Checks if a given string has been encoded.
-   * @param string the value to check
-   * @return true if the string value has been encoded
-   */
-  public boolean contains(String string) {
-    return stringToId.containsKey(string);
-  }
-
-  /**
-   * Gets the number of unique file names currently tracked.
-   * @return the number of entries in the codec
-   */
-  public int size() {
-    return stringToId.size();
-  }
-
-  /**
-   * Removes a string value and its ID from the pool. This should only be 
called when all blocks for
-   * a file have been evicted from the cache.
-   * @param string the file name to remove
-   * @return true if the file name was removed, false if it wasn't present
-   */
-  public boolean remove(String string) {
-    if (string == null) {
-      return false;
-    }
-    Integer id = stringToId.remove(string);
-    if (id != null) {
-      idToString.remove(id);
-      LOG.debug("Removed string value from pool: {} (ID: {})", string, id);
-      return true;
-    }
-    return false;
-  }
-
-  /**
-   * Clears all mappings from the codec.
-   */
-  public void clear() {
-    stringToId.clear();
-    idToString.clear();
-    nextId.set(0);
-    LOG.info("Cleared all file name mappings from codec");
-  }
-
-  /**
-   * Gets statistics about memory savings from string pooling.
-   * @return a formatted string with compression statistics
-   */
-  public String getPoolStats() {
-    long uniqueStrings = stringToId.size();
-    if (uniqueStrings == 0) {
-      return "No strings encoded";
-    }
-    // Calculate average string length
-    long totalChars = 
stringToId.keySet().stream().mapToLong(String::length).sum();
-    double avgLength = (double) totalChars / uniqueStrings;
-    return String.format("FilePathStringPool stats: %d unique strings, avg 
length: %.1f chars, ",
-      uniqueStrings, avgLength);
-  }
-}
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
index fc42009d4f9..7ae2f3ef216 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
@@ -49,7 +49,6 @@ import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.concurrent.atomic.LongAdder;
@@ -681,37 +680,6 @@ public class TestBucketCache {
     assertEquals(1, cache.getStats().getEvictionCount());
   }
 
-  @Test
-  public void testStringPool() throws Exception {
-    HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
-    Path testDir = TEST_UTIL.getDataTestDir();
-    TEST_UTIL.getTestFileSystem().mkdirs(testDir);
-    BucketCache bucketCache =
-      new BucketCache("file:" + testDir + "/bucket.cache", capacitySize, 
constructedBlockSize,
-        constructedBlockSizes, writeThreads, writerQLen, testDir + 
"/bucket.persistence");
-    assertTrue(bucketCache.waitForCacheInitialization(10000));
-    long usedSize = bucketCache.getAllocator().getUsedSize();
-    assertEquals(0, usedSize);
-    Random rand = ThreadLocalRandom.current();
-    Path filePath = new Path(testDir, Long.toString(rand.nextLong()));
-    CacheTestUtils.HFileBlockPair[] blocks =
-      CacheTestUtils.generateBlocksForPath(constructedBlockSize, 1, filePath, 
false);
-    String name = blocks[0].getBlockName().getHfileName();
-    assertEquals(name, filePath.getName());
-    assertNotNull(blocks[0].getBlockName().getRegionName());
-    bucketCache.cacheBlock(blocks[0].getBlockName(), blocks[0].getBlock());
-    waitUntilFlushedToBucket(bucketCache, blocks[0].getBlockName());
-    assertTrue(FilePathStringPool.getInstance().size() > 0);
-    bucketCache.evictBlock(blocks[0].getBlockName());
-    assertTrue(FilePathStringPool.getInstance().size() > 0);
-    bucketCache.cacheBlock(blocks[0].getBlockName(), blocks[0].getBlock());
-    waitUntilFlushedToBucket(bucketCache, blocks[0].getBlockName());
-    bucketCache.fileCacheCompleted(filePath,
-      bucketCache.backingMap.get(blocks[0].getBlockName()).getLength());
-    bucketCache.evictBlocksByHfileName(name);
-    assertEquals(1, FilePathStringPool.getInstance().size());
-  }
-
   @Test
   public void testCacheBlockNextBlockMetadataMissing() throws Exception {
     int size = 100;
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFilePathStringPool.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFilePathStringPool.java
deleted file mode 100644
index a42a61c93fb..00000000000
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFilePathStringPool.java
+++ /dev/null
@@ -1,326 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.io.hfile.bucket;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-import org.apache.hadoop.hbase.HBaseClassTestRule;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.junit.Before;
-import org.junit.ClassRule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * Tests for {@link FilePathStringPool}
- */
-@Category({ SmallTests.class })
-public class TestFilePathStringPool {
-
-  @ClassRule
-  public static final HBaseClassTestRule CLASS_RULE =
-    HBaseClassTestRule.forClass(TestFilePathStringPool.class);
-
-  private FilePathStringPool pool;
-
-  @Before
-  public void setUp() {
-    pool = FilePathStringPool.getInstance();
-    pool.clear();
-  }
-
-  @Test
-  public void testSingletonPattern() {
-    FilePathStringPool instance1 = FilePathStringPool.getInstance();
-    FilePathStringPool instance2 = FilePathStringPool.getInstance();
-    assertNotNull(instance1);
-    assertNotNull(instance2);
-    assertEquals(instance1, instance2);
-  }
-
-  @Test
-  public void testBasicEncodeDecodeRoundTrip() {
-    String testString = 
"/hbase/data/default/test-table/region1/cf1/file1.hfile";
-    int id = pool.encode(testString);
-    String decoded = pool.decode(id);
-    assertEquals(testString, decoded);
-  }
-
-  @Test
-  public void testEncodeReturnsSameIdForSameString() {
-    String testString = "/hbase/data/file.hfile";
-    int id1 = pool.encode(testString);
-    int id2 = pool.encode(testString);
-    assertEquals(id1, id2);
-    assertEquals(1, pool.size());
-  }
-
-  @Test
-  public void testEncodeDifferentStringsGetDifferentIds() {
-    String string1 = "/path/to/file1.hfile";
-    String string2 = "/path/to/file2.hfile";
-    int id1 = pool.encode(string1);
-    int id2 = pool.encode(string2);
-    assertNotEquals(id1, id2);
-    assertEquals(2, pool.size());
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testEncodeNullStringThrowsException() {
-    pool.encode(null);
-  }
-
-  @Test
-  public void testDecodeNonExistentIdReturnsNull() {
-    String decoded = pool.decode(999999);
-    assertNull(decoded);
-  }
-
-  @Test
-  public void testContainsWithId() {
-    String testString = "/hbase/file.hfile";
-    int id = pool.encode(testString);
-    assertTrue(pool.contains(id));
-    assertFalse(pool.contains(id + 1));
-  }
-
-  @Test
-  public void testContainsWithString() {
-    String testString = "/hbase/file.hfile";
-    pool.encode(testString);
-    assertTrue(pool.contains(testString));
-    assertFalse(pool.contains("/hbase/other-file.hfile"));
-  }
-
-  @Test
-  public void testRemoveExistingString() {
-    String testString = "/hbase/file.hfile";
-    int id = pool.encode(testString);
-    assertEquals(1, pool.size());
-    assertTrue(pool.contains(testString));
-    boolean removed = pool.remove(testString);
-    assertTrue(removed);
-    assertEquals(0, pool.size());
-    assertFalse(pool.contains(testString));
-    assertFalse(pool.contains(id));
-    assertNull(pool.decode(id));
-  }
-
-  @Test
-  public void testRemoveNonExistentStringReturnsFalse() {
-    boolean removed = pool.remove("/non/existent/file.hfile");
-    assertFalse(removed);
-  }
-
-  @Test
-  public void testRemoveNullStringReturnsFalse() {
-    boolean removed = pool.remove(null);
-    assertFalse(removed);
-  }
-
-  @Test
-  public void testClear() {
-    pool.encode("/file1.hfile");
-    pool.encode("/file2.hfile");
-    pool.encode("/file3.hfile");
-    assertEquals(3, pool.size());
-    pool.clear();
-    assertEquals(0, pool.size());
-  }
-
-  @Test
-  public void testSizeTracking() {
-    assertEquals(0, pool.size());
-    pool.encode("/file1.hfile");
-    assertEquals(1, pool.size());
-    pool.encode("/file2.hfile");
-    assertEquals(2, pool.size());
-    // Encoding same string should not increase size
-    pool.encode("/file1.hfile");
-    assertEquals(2, pool.size());
-    pool.remove("/file1.hfile");
-    assertEquals(1, pool.size());
-    pool.clear();
-    assertEquals(0, pool.size());
-  }
-
-  @Test
-  public void testGetPoolStats() {
-    String stats = pool.getPoolStats();
-    assertEquals("No strings encoded", stats);
-    pool.encode("/hbase/data/table1/file1.hfile");
-    pool.encode("/hbase/data/table2/file2.hfile");
-    stats = pool.getPoolStats();
-    assertNotNull(stats);
-    assertTrue(stats.contains("2 unique strings"));
-    assertTrue(stats.contains("avg length:"));
-  }
-
-  @Test
-  public void testConcurrentEncoding() throws InterruptedException {
-    int numThreads = 10;
-    int stringsPerThread = 100;
-    ExecutorService executor = Executors.newFixedThreadPool(numThreads);
-    CountDownLatch doneLatch = new CountDownLatch(numThreads);
-    ConcurrentHashMap<String, Integer> results = new ConcurrentHashMap<>();
-    AtomicInteger errorCount = new AtomicInteger(0);
-
-    for (int t = 0; t < numThreads; t++) {
-      final int threadId = t;
-      executor.submit(() -> {
-        try {
-          for (int i = 0; i < stringsPerThread; i++) {
-            String string = "/thread" + threadId + "/file" + i + ".hfile";
-            int id = pool.encode(string);
-            results.put(string, id);
-          }
-        } catch (Exception e) {
-          errorCount.incrementAndGet();
-        } finally {
-          doneLatch.countDown();
-        }
-      });
-    }
-
-    assertTrue(doneLatch.await(30, TimeUnit.SECONDS));
-    executor.shutdown();
-
-    assertEquals(0, errorCount.get());
-    assertEquals(numThreads * stringsPerThread, pool.size());
-    assertEquals(numThreads * stringsPerThread, results.size());
-
-    // Verify all strings can be decoded correctly
-    for (Map.Entry<String, Integer> entry : results.entrySet()) {
-      String decoded = pool.decode(entry.getValue());
-      assertEquals(entry.getKey(), decoded);
-    }
-  }
-
-  @Test
-  public void testConcurrentEncodingSameStrings() throws InterruptedException {
-    int numThreads = 20;
-    String sharedString = "/shared/file.hfile";
-    ExecutorService executor = Executors.newFixedThreadPool(numThreads);
-    CountDownLatch doneLatch = new CountDownLatch(numThreads);
-    Set<Integer> ids = ConcurrentHashMap.newKeySet();
-    AtomicInteger errorCount = new AtomicInteger(0);
-
-    for (int i = 0; i < numThreads; i++) {
-      executor.submit(() -> {
-        try {
-          int id = pool.encode(sharedString);
-          ids.add(id);
-        } catch (Exception e) {
-          errorCount.incrementAndGet();
-        } finally {
-          doneLatch.countDown();
-        }
-      });
-    }
-
-    doneLatch.await(10, TimeUnit.SECONDS);
-    executor.shutdown();
-
-    assertEquals(0, errorCount.get());
-    // All threads should get the same ID
-    assertEquals(1, ids.size());
-    assertEquals(1, pool.size());
-  }
-
-  @Test
-  public void testConcurrentRemoval() throws InterruptedException {
-    // Pre-populate with strings
-    List<String> strings = new ArrayList<>();
-    for (int i = 0; i < 100; i++) {
-      String string = "/file" + i + ".hfile";
-      strings.add(string);
-      pool.encode(string);
-    }
-    assertEquals(100, pool.size());
-
-    int numThreads = 10;
-    ExecutorService executor = Executors.newFixedThreadPool(numThreads);
-    CountDownLatch doneLatch = new CountDownLatch(numThreads);
-    AtomicInteger successfulRemovals = new AtomicInteger(0);
-
-    for (int t = 0; t < numThreads; t++) {
-      final int threadId = t;
-      executor.submit(() -> {
-        try {
-          for (int i = threadId * 10; i < (threadId + 1) * 10; i++) {
-            if (pool.remove(strings.get(i))) {
-              successfulRemovals.incrementAndGet();
-            }
-          }
-        } catch (Exception e) {
-          // Ignore
-        } finally {
-          doneLatch.countDown();
-        }
-      });
-    }
-
-    doneLatch.await(10, TimeUnit.SECONDS);
-    executor.shutdown();
-
-    assertEquals(100, successfulRemovals.get());
-    assertEquals(0, pool.size());
-  }
-
-  @Test
-  public void testBidirectionalMappingConsistency() {
-    // Verify that both mappings stay consistent
-    List<String> strings = new ArrayList<>();
-    List<Integer> ids = new ArrayList<>();
-
-    for (int i = 0; i < 50; i++) {
-      String string = "/region" + (i % 5) + "/file" + i + ".hfile";
-      strings.add(string);
-      ids.add(pool.encode(string));
-    }
-
-    // Verify forward mapping (string -> id)
-    for (int i = 0; i < strings.size(); i++) {
-      int expectedId = ids.get(i);
-      int actualId = pool.encode(strings.get(i));
-      assertEquals(expectedId, actualId);
-    }
-
-    // Verify reverse mapping (id -> string)
-    for (int i = 0; i < ids.size(); i++) {
-      String expectedString = strings.get(i);
-      String actualString = pool.decode(ids.get(i));
-      assertEquals(expectedString, actualString);
-    }
-  }
-}


Reply via email to