diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/FastStringPool.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/FastStringPool.java
new file mode 100644
index 000000000000..a5f0dc17bcc1
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/FastStringPool.java
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import java.lang.ref.ReferenceQueue;
+import java.lang.ref.WeakReference;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.yetus.audience.InterfaceAudience;
+
+import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
+
+/**
+ * A string pool like {@link String#intern()}, but more flexible as we can create multiple instances
+ * and use them in difference places, where {@link String#intern()} is global.
+ *
+ * We use {@link WeakReference} so when there are no actual reference to the String, it will be GCed
+ * to reduce memory pressure.
+ *
+ * The difference between {@link WeakObjectPool} is that, we also need to use {@link WeakReference}
+ * as key, not only value, because the key(a String) is exactly what we want to deduplicate.
+ */
+@InterfaceAudience.Private
+public class FastStringPool {
+
+ private static final class WeakKey extends WeakReference {
+
+ private final int hash;
+
+ WeakKey(String referent, ReferenceQueue queue) {
+ super(Preconditions.checkNotNull(referent), queue);
+ // must calculate it here, as later the referent may be GCed
+ this.hash = referent.hashCode();
+ }
+
+ @Override
+ public int hashCode() {
+ return hash;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (!(obj instanceof WeakKey)) {
+ return false;
+ }
+
+ String a = this.get();
+ String b = ((WeakKey) obj).get();
+ // In ConcurrentHashMap, we will always compare references(like entry.key == key) before
+ // calling actual equals method, so this will not cause problems for clean up. And in normal
+ // intern path, the reference will never be null, so there is no problem too.
+ if (a == null || b == null) {
+ return false;
+ }
+ return a.equals(b);
+ }
+ }
+
+ private final ConcurrentHashMap> map = new ConcurrentHashMap<>();
+
+ private final ReferenceQueue refQueue = new ReferenceQueue<>();
+
+ private final Lock cleanupLock = new ReentrantLock();
+
+ // only call cleanup every 256 times
+ private static final int CLEANUP_MASK = 0xFF;
+ private final AtomicInteger counter = new AtomicInteger();
+
+ public String intern(String s) {
+ Preconditions.checkNotNull(s);
+ maybeCleanup();
+
+ WeakKey lookupKey = new WeakKey(s, null);
+ WeakReference ref = map.get(lookupKey);
+ if (ref != null) {
+ String v = ref.get();
+ if (v != null) {
+ return v;
+ }
+ }
+
+ WeakKey storeKey = new WeakKey(s, refQueue);
+ WeakReference storeVal = new WeakReference<>(s);
+ // Used to store the return value. The return value of compute method is a WeakReference, the
+ // value of the WeakReference may be GCed just before we get it for returning.
+ MutableObject ret = new MutableObject<>();
+
+ map.compute(storeKey, (k, prevVal) -> {
+ if (prevVal == null) {
+ ret.setValue(s);
+ return storeVal;
+ } else {
+ String prevRef = prevVal.get();
+ if (prevRef != null) {
+ ret.setValue(prevRef);
+ return prevVal;
+ } else {
+ ret.setValue(s);
+ return storeVal;
+ }
+ }
+ });
+ assert ret.get() != null;
+ return ret.get();
+ }
+
+ private void cleanup() {
+ if (!cleanupLock.tryLock()) {
+ // a cleanup task is ongoing, give up
+ return;
+ }
+ try {
+ for (;;) {
+ WeakKey k = (WeakKey) refQueue.poll();
+ if (k == null) {
+ return;
+ }
+ map.remove(k);
+ }
+ } finally {
+ cleanupLock.unlock();
+ }
+ }
+
+ private void maybeCleanup() {
+ if ((counter.incrementAndGet() & CLEANUP_MASK) != 0) {
+ return;
+ }
+ cleanup();
+ }
+
+ public int size() {
+ // size method is not on critical path, so always call cleanup here to reduce memory pressure
+ cleanup();
+ return map.size();
+ }
+}
diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestFastStringPool.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestFastStringPool.java
new file mode 100644
index 000000000000..dd0a83393342
--- /dev/null
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestFastStringPool.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Tag(SmallTests.TAG)
+public class TestFastStringPool {
+
+ private static final Logger LOG = LoggerFactory.getLogger(TestFastStringPool.class);
+
+ @Test
+ public void testMultiThread() throws InterruptedException {
+ FastStringPool pool = new FastStringPool();
+ List list1 = new ArrayList<>();
+ List list2 = new ArrayList<>();
+ for (int i = 0; i < 1000; i++) {
+ list1.add("list1-" + i);
+ list2.add("list2-" + i);
+ }
+ Map interned1 = new HashMap<>();
+ Map interned2 = new HashMap<>();
+ AtomicBoolean failed = new AtomicBoolean(false);
+ int numThreads = 10;
+ List threads = new ArrayList<>();
+ for (int i = 0; i < numThreads; i++) {
+ threads.add(new Thread(() -> {
+ ThreadLocalRandom rand = ThreadLocalRandom.current();
+ for (int j = 0; j < 1000000; j++) {
+ List list;
+ Map interned;
+ if (rand.nextBoolean()) {
+ list = list1;
+ interned = interned1;
+ } else {
+ list = list2;
+ interned = interned2;
+ }
+ // create a new reference
+ String k = new String(list.get(rand.nextInt(list.size())));
+ String v = pool.intern(k);
+ synchronized (interned) {
+ String prev = interned.get(k);
+ if (prev != null) {
+ // should always return the same reference
+ if (prev != v) {
+ failed.set(true);
+ String msg = "reference not equal, intern failed on string " + k;
+ LOG.error(msg);
+ throw new AssertionError(msg);
+ }
+ } else {
+ interned.put(k, v);
+ }
+ }
+ }
+ }));
+ }
+ for (Thread t : threads) {
+ t.start();
+ }
+ for (Thread t : threads) {
+ t.join();
+ }
+ LOG.info("interned1 size {}, interned2 size {}, pool size {}", interned1.size(),
+ interned2.size(), pool.size());
+ assertEquals(interned1.size() + interned2.size(), pool.size());
+ interned1.clear();
+ list1.clear();
+ LOG.info("clear interned1");
+ // wait for at most 30 times
+ for (int i = 0; i < 30; i++) {
+ // invoke gc manually
+ LOG.info("trigger GC");
+ System.gc();
+ Thread.sleep(1000);
+ // should have cleaned up all the references for list1
+ if (interned2.size() == pool.size()) {
+ return;
+ }
+ }
+ fail("should only have list2 strings in pool, expected pool size " + interned2.size()
+ + ", but got " + pool.size());
+ }
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
index 2142de7053f7..254f0adbd5ff 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
@@ -19,8 +19,8 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.io.HeapSize;
-import org.apache.hadoop.hbase.io.hfile.bucket.FilePathStringPool;
import org.apache.hadoop.hbase.util.ClassSize;
+import org.apache.hadoop.hbase.util.FastStringPool;
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
import org.apache.yetus.audience.InterfaceAudience;
@@ -29,17 +29,22 @@
*/
@InterfaceAudience.Private
public class BlockCacheKey implements HeapSize, java.io.Serializable {
- private static final long serialVersionUID = -5199992013113130535L; // Changed due to format
- // change
+ // Changed due to format change
+ private static final long serialVersionUID = -5199992013113130535L;
+
+ private static final FastStringPool HFILE_NAME_POOL = new FastStringPool();
+
+ private static final FastStringPool REGION_NAME_POOL = new FastStringPool();
+
+ private static final FastStringPool CF_NAME_POOL = new FastStringPool();
// New compressed format using integer file ID (when codec is available)
- private final int hfileNameId;
- private transient final FilePathStringPool stringPool;
+ private final String hfileName;
- private final int regionId;
+ private final String regionName;
- private final int cfId;
+ private final String cfName;
private final long offset;
@@ -91,11 +96,10 @@ public BlockCacheKey(String hfileName, String cfName, String regionName, long of
this.isPrimaryReplicaBlock = isPrimaryReplica;
this.offset = offset;
this.blockType = blockType;
- this.stringPool = FilePathStringPool.getInstance();
// Use string pool for file, region and cf values
- this.hfileNameId = stringPool.encode(hfileName);
- this.regionId = (regionName != null) ? stringPool.encode(regionName) : -1;
- this.cfId = (cfName != null) ? stringPool.encode(cfName) : -1;
+ this.hfileName = HFILE_NAME_POOL.intern(hfileName);
+ this.regionName = (regionName != null) ? REGION_NAME_POOL.intern(regionName) : null;
+ this.cfName = (cfName != null) ? CF_NAME_POOL.intern(cfName) : null;
this.archived = archived;
}
@@ -116,7 +120,7 @@ public BlockCacheKey(Path hfilePath, long offset, boolean isPrimaryReplica, Bloc
@Override
public int hashCode() {
- return hfileNameId * 127 + (int) (offset ^ (offset >>> 32));
+ return hfileName.hashCode() * 127 + (int) (offset ^ (offset >>> 32));
}
@Override
@@ -152,7 +156,7 @@ public long heapSize() {
* @return The file name
*/
public String getHfileName() {
- return stringPool.decode(hfileNameId);
+ return hfileName;
}
/**
@@ -160,7 +164,7 @@ public String getHfileName() {
* @return The region name
*/
public String getRegionName() {
- return stringPool.decode(regionId);
+ return regionName;
}
/**
@@ -168,7 +172,7 @@ public String getRegionName() {
* @return The column family name
*/
public String getCfName() {
- return stringPool.decode(cfId);
+ return cfName;
}
public boolean isPrimary() {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
index a65d0fd98954..60ba9f32cd7e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
@@ -410,7 +410,6 @@ private void startPersistenceRetriever(int[] bucketSizes, long capacity) {
LOG.warn("Can't restore from file[{}]. The bucket cache will be reset and rebuilt."
+ " Exception seen: ", persistencePath, ex);
backingMap.clear();
- FilePathStringPool.getInstance().clear();
fullyCachedFiles.clear();
backingMapValidated.set(true);
regionCachedSize.clear();
@@ -793,7 +792,6 @@ private void updateRegionCachedSize(BlockCacheKey key, long cachedSize) {
// remove the entry for that region from regionCachedSize map.
if (regionCachedSize.get(regionName) <= 0) {
regionCachedSize.remove(regionName);
- FilePathStringPool.getInstance().remove(regionName);
}
}
}
@@ -1013,7 +1011,6 @@ public void logStats() {
+ cacheStats.getEvictedCount() + ", " + "evictedPerRun=" + cacheStats.evictedPerEviction()
+ ", " + "allocationFailCount=" + cacheStats.getAllocationFailCount() + ", blocksCount="
+ backingMap.size());
- LOG.info(FilePathStringPool.getInstance().getPoolStats());
cacheStats.reset();
bucketAllocator.logDebugStatistics();
@@ -1757,7 +1754,6 @@ private void persistChunkedBackingMap(FileOutputStream fos) throws IOException {
}
private void retrieveChunkedBackingMap(FileInputStream in) throws IOException {
-
// Read the first chunk that has all the details.
BucketCacheProtos.BucketCacheEntry cacheEntry =
BucketCacheProtos.BucketCacheEntry.parseDelimitedFrom(in);
@@ -1767,7 +1763,6 @@ private void retrieveChunkedBackingMap(FileInputStream in) throws IOException {
backingMap.clear();
blocksByHFile.clear();
- FilePathStringPool.getInstance().clear();
// Read the backing map entries in batches.
int numChunks = 0;
@@ -1823,7 +1818,6 @@ private void disableCache() {
this.blocksByHFile.clear();
this.fullyCachedFiles.clear();
this.regionCachedSize.clear();
- FilePathStringPool.getInstance().clear();
}
if (cacheStats.getMetricsRollerScheduler() != null) {
cacheStats.getMetricsRollerScheduler().shutdownNow();
@@ -1852,7 +1846,6 @@ public void shutdown() {
}
}
persistToFile();
- FilePathStringPool.getInstance().clear();
} catch (IOException ex) {
LOG.error("Unable to persist data on exit: " + ex.toString(), ex);
} catch (InterruptedException e) {
@@ -1943,9 +1936,6 @@ public int evictBlocksRangeByHfileName(String hfileName, long initOffset, long e
Set keySet = getAllCacheKeysForFile(hfileName, initOffset, endOffset);
// We need to make sure whether we are evicting all blocks for this given file. In case of
// split references, we might be evicting just half of the blocks
- int totalFileKeys = (endOffset == Long.MAX_VALUE)
- ? keySet.size()
- : getAllCacheKeysForFile(hfileName, 0, Long.MAX_VALUE).size();
LOG.debug("found {} blocks for file {}, starting offset: {}, end offset: {}", keySet.size(),
hfileName, initOffset, endOffset);
int numEvicted = 0;
@@ -1954,11 +1944,6 @@ public int evictBlocksRangeByHfileName(String hfileName, long initOffset, long e
++numEvicted;
}
}
- if (numEvicted > 0) {
- if (totalFileKeys == numEvicted) {
- FilePathStringPool.getInstance().remove(hfileName);
- }
- }
return numEvicted;
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FilePathStringPool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FilePathStringPool.java
deleted file mode 100644
index 7e08158af0eb..000000000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FilePathStringPool.java
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.io.hfile.bucket;
-
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.atomic.AtomicInteger;
-import org.apache.yetus.audience.InterfaceAudience;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Pool of string values encoded to integer IDs for use in BlockCacheKey. This allows for avoiding
- * duplicating string values for file names, region and CF values on various BlockCacheKey
- * instances. Normally, single hfiles have many blocks. This means all blocks from the same file
- * will have the very same file, region and CF names. On very large BucketCache setups (i.e. file
- * based cache with TB size order), can save few GBs of memory by avoiding repeating these common
- * string values on blocks from the same file. The FilePathStringPool is implemented as a singleton,
- * since the same pool should be shared by all BlockCacheKey instances, as well as the BucketCache
- * object itself. The Id for an encoded string is an integer. Any new String added to the pool is
- * assigned the next available integer ID, starting from 0 upwards. That sets the total pool
- * capacity to Integer.MAX_VALUE. In the event of ID exhaustion (integer overflow when Id values
- * reach Integer.MAX_VALUE), the encode() method will restart iterating over int values
- * incrementally from 0 until it finds an unused ID. Strings can be removed from the pool using the
- * remove() method. BucketCache should call this when evicting all blocks for a given file (see
- * BucketCache.evictFileBlocksFromCache()).
- *
- * Thread-safe implementation that maintains bidirectional mappings between strings and IDs.
- *
- */
-@InterfaceAudience.Private
-public class FilePathStringPool {
- private static final Logger LOG = LoggerFactory.getLogger(FilePathStringPool.class);
-
- // Bidirectional mappings for string objects re-use
- private final ConcurrentHashMap stringToId = new ConcurrentHashMap<>();
- private final ConcurrentHashMap idToString = new ConcurrentHashMap<>();
- private final AtomicInteger nextId = new AtomicInteger(0);
-
- private static FilePathStringPool instance;
-
- public static FilePathStringPool getInstance() {
- synchronized (FilePathStringPool.class) {
- if (instance == null) {
- instance = new FilePathStringPool();
- }
- }
- return instance;
- }
-
- private FilePathStringPool() {
- // Private constructor for singleton
- }
-
- /**
- * Gets or creates an integer ID for the given String.
- * @param string value for the file/region/CF name.
- * @return the integer ID encoding this string in the pool.
- */
- public int encode(String string) {
- if (string == null) {
- throw new IllegalArgumentException("string cannot be null");
- }
- return stringToId.computeIfAbsent(string, name -> {
- if (stringToId.size() == Integer.MAX_VALUE) {
- throw new IllegalStateException(
- "String pool has reached maximum capacity of " + Integer.MAX_VALUE + " unique strings.");
- }
- int id = nextId.getAndIncrement();
- while (idToString.containsKey(id)) {
- id = nextId.getAndIncrement();
- if (id == Integer.MAX_VALUE) {
- nextId.set(0);
- LOG.info("Id values reached Integer.MAX_VALUE, restarting from 0");
- }
- }
- idToString.put(id, name);
- LOG.trace("Encoded new string to ID {}: {}", id, name);
- return id;
- });
- }
-
- /**
- * Decodes an integer ID back to its original file name.
- * @param id the integer ID
- * @return the original file name, or null if not found
- */
- public String decode(int id) {
- return idToString.get(id);
- }
-
- /**
- * Checks if a given string ID is already being used.
- * @param id the integer ID to check
- * @return true if the ID exists
- */
- public boolean contains(int id) {
- return idToString.containsKey(id);
- }
-
- /**
- * Checks if a given string has been encoded.
- * @param string the value to check
- * @return true if the string value has been encoded
- */
- public boolean contains(String string) {
- return stringToId.containsKey(string);
- }
-
- /**
- * Gets the number of unique file names currently tracked.
- * @return the number of entries in the codec
- */
- public int size() {
- return stringToId.size();
- }
-
- /**
- * Removes a string value and its ID from the pool. This should only be called when all blocks for
- * a file have been evicted from the cache.
- * @param string the file name to remove
- * @return true if the file name was removed, false if it wasn't present
- */
- public boolean remove(String string) {
- if (string == null) {
- return false;
- }
- Integer id = stringToId.remove(string);
- if (id != null) {
- idToString.remove(id);
- LOG.debug("Removed string value from pool: {} (ID: {})", string, id);
- return true;
- }
- return false;
- }
-
- /**
- * Clears all mappings from the codec.
- */
- public void clear() {
- stringToId.clear();
- idToString.clear();
- nextId.set(0);
- LOG.info("Cleared all file name mappings from codec");
- }
-
- /**
- * Gets statistics about memory savings from string pooling.
- * @return a formatted string with compression statistics
- */
- public String getPoolStats() {
- long uniqueStrings = stringToId.size();
- if (uniqueStrings == 0) {
- return "No strings encoded";
- }
- // Calculate average string length
- long totalChars = stringToId.keySet().stream().mapToLong(String::length).sum();
- double avgLength = (double) totalChars / uniqueStrings;
- return String.format("FilePathStringPool stats: %d unique strings, avg length: %.1f chars, ",
- uniqueStrings, avgLength);
- }
-}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
index fc42009d4f9b..7ae2f3ef2160 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
@@ -49,7 +49,6 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.Random;
import java.util.Set;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.LongAdder;
@@ -681,37 +680,6 @@ public void testEvictionCount() throws InterruptedException {
assertEquals(1, cache.getStats().getEvictionCount());
}
- @Test
- public void testStringPool() throws Exception {
- HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
- Path testDir = TEST_UTIL.getDataTestDir();
- TEST_UTIL.getTestFileSystem().mkdirs(testDir);
- BucketCache bucketCache =
- new BucketCache("file:" + testDir + "/bucket.cache", capacitySize, constructedBlockSize,
- constructedBlockSizes, writeThreads, writerQLen, testDir + "/bucket.persistence");
- assertTrue(bucketCache.waitForCacheInitialization(10000));
- long usedSize = bucketCache.getAllocator().getUsedSize();
- assertEquals(0, usedSize);
- Random rand = ThreadLocalRandom.current();
- Path filePath = new Path(testDir, Long.toString(rand.nextLong()));
- CacheTestUtils.HFileBlockPair[] blocks =
- CacheTestUtils.generateBlocksForPath(constructedBlockSize, 1, filePath, false);
- String name = blocks[0].getBlockName().getHfileName();
- assertEquals(name, filePath.getName());
- assertNotNull(blocks[0].getBlockName().getRegionName());
- bucketCache.cacheBlock(blocks[0].getBlockName(), blocks[0].getBlock());
- waitUntilFlushedToBucket(bucketCache, blocks[0].getBlockName());
- assertTrue(FilePathStringPool.getInstance().size() > 0);
- bucketCache.evictBlock(blocks[0].getBlockName());
- assertTrue(FilePathStringPool.getInstance().size() > 0);
- bucketCache.cacheBlock(blocks[0].getBlockName(), blocks[0].getBlock());
- waitUntilFlushedToBucket(bucketCache, blocks[0].getBlockName());
- bucketCache.fileCacheCompleted(filePath,
- bucketCache.backingMap.get(blocks[0].getBlockName()).getLength());
- bucketCache.evictBlocksByHfileName(name);
- assertEquals(1, FilePathStringPool.getInstance().size());
- }
-
@Test
public void testCacheBlockNextBlockMetadataMissing() throws Exception {
int size = 100;
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFilePathStringPool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFilePathStringPool.java
deleted file mode 100644
index a42a61c93fbd..000000000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFilePathStringPool.java
+++ /dev/null
@@ -1,326 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.io.hfile.bucket;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-import org.apache.hadoop.hbase.HBaseClassTestRule;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.junit.Before;
-import org.junit.ClassRule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * Tests for {@link FilePathStringPool}
- */
-@Category({ SmallTests.class })
-public class TestFilePathStringPool {
-
- @ClassRule
- public static final HBaseClassTestRule CLASS_RULE =
- HBaseClassTestRule.forClass(TestFilePathStringPool.class);
-
- private FilePathStringPool pool;
-
- @Before
- public void setUp() {
- pool = FilePathStringPool.getInstance();
- pool.clear();
- }
-
- @Test
- public void testSingletonPattern() {
- FilePathStringPool instance1 = FilePathStringPool.getInstance();
- FilePathStringPool instance2 = FilePathStringPool.getInstance();
- assertNotNull(instance1);
- assertNotNull(instance2);
- assertEquals(instance1, instance2);
- }
-
- @Test
- public void testBasicEncodeDecodeRoundTrip() {
- String testString = "/hbase/data/default/test-table/region1/cf1/file1.hfile";
- int id = pool.encode(testString);
- String decoded = pool.decode(id);
- assertEquals(testString, decoded);
- }
-
- @Test
- public void testEncodeReturnsSameIdForSameString() {
- String testString = "/hbase/data/file.hfile";
- int id1 = pool.encode(testString);
- int id2 = pool.encode(testString);
- assertEquals(id1, id2);
- assertEquals(1, pool.size());
- }
-
- @Test
- public void testEncodeDifferentStringsGetDifferentIds() {
- String string1 = "/path/to/file1.hfile";
- String string2 = "/path/to/file2.hfile";
- int id1 = pool.encode(string1);
- int id2 = pool.encode(string2);
- assertNotEquals(id1, id2);
- assertEquals(2, pool.size());
- }
-
- @Test(expected = IllegalArgumentException.class)
- public void testEncodeNullStringThrowsException() {
- pool.encode(null);
- }
-
- @Test
- public void testDecodeNonExistentIdReturnsNull() {
- String decoded = pool.decode(999999);
- assertNull(decoded);
- }
-
- @Test
- public void testContainsWithId() {
- String testString = "/hbase/file.hfile";
- int id = pool.encode(testString);
- assertTrue(pool.contains(id));
- assertFalse(pool.contains(id + 1));
- }
-
- @Test
- public void testContainsWithString() {
- String testString = "/hbase/file.hfile";
- pool.encode(testString);
- assertTrue(pool.contains(testString));
- assertFalse(pool.contains("/hbase/other-file.hfile"));
- }
-
- @Test
- public void testRemoveExistingString() {
- String testString = "/hbase/file.hfile";
- int id = pool.encode(testString);
- assertEquals(1, pool.size());
- assertTrue(pool.contains(testString));
- boolean removed = pool.remove(testString);
- assertTrue(removed);
- assertEquals(0, pool.size());
- assertFalse(pool.contains(testString));
- assertFalse(pool.contains(id));
- assertNull(pool.decode(id));
- }
-
- @Test
- public void testRemoveNonExistentStringReturnsFalse() {
- boolean removed = pool.remove("/non/existent/file.hfile");
- assertFalse(removed);
- }
-
- @Test
- public void testRemoveNullStringReturnsFalse() {
- boolean removed = pool.remove(null);
- assertFalse(removed);
- }
-
- @Test
- public void testClear() {
- pool.encode("/file1.hfile");
- pool.encode("/file2.hfile");
- pool.encode("/file3.hfile");
- assertEquals(3, pool.size());
- pool.clear();
- assertEquals(0, pool.size());
- }
-
- @Test
- public void testSizeTracking() {
- assertEquals(0, pool.size());
- pool.encode("/file1.hfile");
- assertEquals(1, pool.size());
- pool.encode("/file2.hfile");
- assertEquals(2, pool.size());
- // Encoding same string should not increase size
- pool.encode("/file1.hfile");
- assertEquals(2, pool.size());
- pool.remove("/file1.hfile");
- assertEquals(1, pool.size());
- pool.clear();
- assertEquals(0, pool.size());
- }
-
- @Test
- public void testGetPoolStats() {
- String stats = pool.getPoolStats();
- assertEquals("No strings encoded", stats);
- pool.encode("/hbase/data/table1/file1.hfile");
- pool.encode("/hbase/data/table2/file2.hfile");
- stats = pool.getPoolStats();
- assertNotNull(stats);
- assertTrue(stats.contains("2 unique strings"));
- assertTrue(stats.contains("avg length:"));
- }
-
- @Test
- public void testConcurrentEncoding() throws InterruptedException {
- int numThreads = 10;
- int stringsPerThread = 100;
- ExecutorService executor = Executors.newFixedThreadPool(numThreads);
- CountDownLatch doneLatch = new CountDownLatch(numThreads);
- ConcurrentHashMap results = new ConcurrentHashMap<>();
- AtomicInteger errorCount = new AtomicInteger(0);
-
- for (int t = 0; t < numThreads; t++) {
- final int threadId = t;
- executor.submit(() -> {
- try {
- for (int i = 0; i < stringsPerThread; i++) {
- String string = "/thread" + threadId + "/file" + i + ".hfile";
- int id = pool.encode(string);
- results.put(string, id);
- }
- } catch (Exception e) {
- errorCount.incrementAndGet();
- } finally {
- doneLatch.countDown();
- }
- });
- }
-
- assertTrue(doneLatch.await(30, TimeUnit.SECONDS));
- executor.shutdown();
-
- assertEquals(0, errorCount.get());
- assertEquals(numThreads * stringsPerThread, pool.size());
- assertEquals(numThreads * stringsPerThread, results.size());
-
- // Verify all strings can be decoded correctly
- for (Map.Entry entry : results.entrySet()) {
- String decoded = pool.decode(entry.getValue());
- assertEquals(entry.getKey(), decoded);
- }
- }
-
- @Test
- public void testConcurrentEncodingSameStrings() throws InterruptedException {
- int numThreads = 20;
- String sharedString = "/shared/file.hfile";
- ExecutorService executor = Executors.newFixedThreadPool(numThreads);
- CountDownLatch doneLatch = new CountDownLatch(numThreads);
- Set ids = ConcurrentHashMap.newKeySet();
- AtomicInteger errorCount = new AtomicInteger(0);
-
- for (int i = 0; i < numThreads; i++) {
- executor.submit(() -> {
- try {
- int id = pool.encode(sharedString);
- ids.add(id);
- } catch (Exception e) {
- errorCount.incrementAndGet();
- } finally {
- doneLatch.countDown();
- }
- });
- }
-
- doneLatch.await(10, TimeUnit.SECONDS);
- executor.shutdown();
-
- assertEquals(0, errorCount.get());
- // All threads should get the same ID
- assertEquals(1, ids.size());
- assertEquals(1, pool.size());
- }
-
- @Test
- public void testConcurrentRemoval() throws InterruptedException {
- // Pre-populate with strings
- List strings = new ArrayList<>();
- for (int i = 0; i < 100; i++) {
- String string = "/file" + i + ".hfile";
- strings.add(string);
- pool.encode(string);
- }
- assertEquals(100, pool.size());
-
- int numThreads = 10;
- ExecutorService executor = Executors.newFixedThreadPool(numThreads);
- CountDownLatch doneLatch = new CountDownLatch(numThreads);
- AtomicInteger successfulRemovals = new AtomicInteger(0);
-
- for (int t = 0; t < numThreads; t++) {
- final int threadId = t;
- executor.submit(() -> {
- try {
- for (int i = threadId * 10; i < (threadId + 1) * 10; i++) {
- if (pool.remove(strings.get(i))) {
- successfulRemovals.incrementAndGet();
- }
- }
- } catch (Exception e) {
- // Ignore
- } finally {
- doneLatch.countDown();
- }
- });
- }
-
- doneLatch.await(10, TimeUnit.SECONDS);
- executor.shutdown();
-
- assertEquals(100, successfulRemovals.get());
- assertEquals(0, pool.size());
- }
-
- @Test
- public void testBidirectionalMappingConsistency() {
- // Verify that both mappings stay consistent
- List strings = new ArrayList<>();
- List ids = new ArrayList<>();
-
- for (int i = 0; i < 50; i++) {
- String string = "/region" + (i % 5) + "/file" + i + ".hfile";
- strings.add(string);
- ids.add(pool.encode(string));
- }
-
- // Verify forward mapping (string -> id)
- for (int i = 0; i < strings.size(); i++) {
- int expectedId = ids.get(i);
- int actualId = pool.encode(strings.get(i));
- assertEquals(expectedId, actualId);
- }
-
- // Verify reverse mapping (id -> string)
- for (int i = 0; i < ids.size(); i++) {
- String expectedString = strings.get(i);
- String actualString = pool.decode(ids.get(i));
- assertEquals(expectedString, actualString);
- }
- }
-}