diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/FastStringPool.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/FastStringPool.java new file mode 100644 index 000000000000..a5f0dc17bcc1 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/FastStringPool.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.lang.ref.ReferenceQueue; +import java.lang.ref.WeakReference; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; +import org.apache.commons.lang3.mutable.MutableObject; +import org.apache.yetus.audience.InterfaceAudience; + +import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; + +/** + * A string pool like {@link String#intern()}, but more flexible as we can create multiple instances + * and use them in difference places, where {@link String#intern()} is global. + *

+ * We use {@link WeakReference} so when there are no actual reference to the String, it will be GCed + * to reduce memory pressure. + *

+ * The difference between {@link WeakObjectPool} is that, we also need to use {@link WeakReference} + * as key, not only value, because the key(a String) is exactly what we want to deduplicate. + */ +@InterfaceAudience.Private +public class FastStringPool { + + private static final class WeakKey extends WeakReference { + + private final int hash; + + WeakKey(String referent, ReferenceQueue queue) { + super(Preconditions.checkNotNull(referent), queue); + // must calculate it here, as later the referent may be GCed + this.hash = referent.hashCode(); + } + + @Override + public int hashCode() { + return hash; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof WeakKey)) { + return false; + } + + String a = this.get(); + String b = ((WeakKey) obj).get(); + // In ConcurrentHashMap, we will always compare references(like entry.key == key) before + // calling actual equals method, so this will not cause problems for clean up. And in normal + // intern path, the reference will never be null, so there is no problem too. + if (a == null || b == null) { + return false; + } + return a.equals(b); + } + } + + private final ConcurrentHashMap> map = new ConcurrentHashMap<>(); + + private final ReferenceQueue refQueue = new ReferenceQueue<>(); + + private final Lock cleanupLock = new ReentrantLock(); + + // only call cleanup every 256 times + private static final int CLEANUP_MASK = 0xFF; + private final AtomicInteger counter = new AtomicInteger(); + + public String intern(String s) { + Preconditions.checkNotNull(s); + maybeCleanup(); + + WeakKey lookupKey = new WeakKey(s, null); + WeakReference ref = map.get(lookupKey); + if (ref != null) { + String v = ref.get(); + if (v != null) { + return v; + } + } + + WeakKey storeKey = new WeakKey(s, refQueue); + WeakReference storeVal = new WeakReference<>(s); + // Used to store the return value. The return value of compute method is a WeakReference, the + // value of the WeakReference may be GCed just before we get it for returning. + MutableObject ret = new MutableObject<>(); + + map.compute(storeKey, (k, prevVal) -> { + if (prevVal == null) { + ret.setValue(s); + return storeVal; + } else { + String prevRef = prevVal.get(); + if (prevRef != null) { + ret.setValue(prevRef); + return prevVal; + } else { + ret.setValue(s); + return storeVal; + } + } + }); + assert ret.get() != null; + return ret.get(); + } + + private void cleanup() { + if (!cleanupLock.tryLock()) { + // a cleanup task is ongoing, give up + return; + } + try { + for (;;) { + WeakKey k = (WeakKey) refQueue.poll(); + if (k == null) { + return; + } + map.remove(k); + } + } finally { + cleanupLock.unlock(); + } + } + + private void maybeCleanup() { + if ((counter.incrementAndGet() & CLEANUP_MASK) != 0) { + return; + } + cleanup(); + } + + public int size() { + // size method is not on critical path, so always call cleanup here to reduce memory pressure + cleanup(); + return map.size(); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestFastStringPool.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestFastStringPool.java new file mode 100644 index 000000000000..dd0a83393342 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestFastStringPool.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Tag(SmallTests.TAG) +public class TestFastStringPool { + + private static final Logger LOG = LoggerFactory.getLogger(TestFastStringPool.class); + + @Test + public void testMultiThread() throws InterruptedException { + FastStringPool pool = new FastStringPool(); + List list1 = new ArrayList<>(); + List list2 = new ArrayList<>(); + for (int i = 0; i < 1000; i++) { + list1.add("list1-" + i); + list2.add("list2-" + i); + } + Map interned1 = new HashMap<>(); + Map interned2 = new HashMap<>(); + AtomicBoolean failed = new AtomicBoolean(false); + int numThreads = 10; + List threads = new ArrayList<>(); + for (int i = 0; i < numThreads; i++) { + threads.add(new Thread(() -> { + ThreadLocalRandom rand = ThreadLocalRandom.current(); + for (int j = 0; j < 1000000; j++) { + List list; + Map interned; + if (rand.nextBoolean()) { + list = list1; + interned = interned1; + } else { + list = list2; + interned = interned2; + } + // create a new reference + String k = new String(list.get(rand.nextInt(list.size()))); + String v = pool.intern(k); + synchronized (interned) { + String prev = interned.get(k); + if (prev != null) { + // should always return the same reference + if (prev != v) { + failed.set(true); + String msg = "reference not equal, intern failed on string " + k; + LOG.error(msg); + throw new AssertionError(msg); + } + } else { + interned.put(k, v); + } + } + } + })); + } + for (Thread t : threads) { + t.start(); + } + for (Thread t : threads) { + t.join(); + } + LOG.info("interned1 size {}, interned2 size {}, pool size {}", interned1.size(), + interned2.size(), pool.size()); + assertEquals(interned1.size() + interned2.size(), pool.size()); + interned1.clear(); + list1.clear(); + LOG.info("clear interned1"); + // wait for at most 30 times + for (int i = 0; i < 30; i++) { + // invoke gc manually + LOG.info("trigger GC"); + System.gc(); + Thread.sleep(1000); + // should have cleaned up all the references for list1 + if (interned2.size() == pool.size()) { + return; + } + } + fail("should only have list2 strings in pool, expected pool size " + interned2.size() + + ", but got " + pool.size()); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java index 2142de7053f7..254f0adbd5ff 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java @@ -19,8 +19,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.io.HeapSize; -import org.apache.hadoop.hbase.io.hfile.bucket.FilePathStringPool; import org.apache.hadoop.hbase.util.ClassSize; +import org.apache.hadoop.hbase.util.FastStringPool; import org.apache.hadoop.hbase.util.HFileArchiveUtil; import org.apache.yetus.audience.InterfaceAudience; @@ -29,17 +29,22 @@ */ @InterfaceAudience.Private public class BlockCacheKey implements HeapSize, java.io.Serializable { - private static final long serialVersionUID = -5199992013113130535L; // Changed due to format - // change + // Changed due to format change + private static final long serialVersionUID = -5199992013113130535L; + + private static final FastStringPool HFILE_NAME_POOL = new FastStringPool(); + + private static final FastStringPool REGION_NAME_POOL = new FastStringPool(); + + private static final FastStringPool CF_NAME_POOL = new FastStringPool(); // New compressed format using integer file ID (when codec is available) - private final int hfileNameId; - private transient final FilePathStringPool stringPool; + private final String hfileName; - private final int regionId; + private final String regionName; - private final int cfId; + private final String cfName; private final long offset; @@ -91,11 +96,10 @@ public BlockCacheKey(String hfileName, String cfName, String regionName, long of this.isPrimaryReplicaBlock = isPrimaryReplica; this.offset = offset; this.blockType = blockType; - this.stringPool = FilePathStringPool.getInstance(); // Use string pool for file, region and cf values - this.hfileNameId = stringPool.encode(hfileName); - this.regionId = (regionName != null) ? stringPool.encode(regionName) : -1; - this.cfId = (cfName != null) ? stringPool.encode(cfName) : -1; + this.hfileName = HFILE_NAME_POOL.intern(hfileName); + this.regionName = (regionName != null) ? REGION_NAME_POOL.intern(regionName) : null; + this.cfName = (cfName != null) ? CF_NAME_POOL.intern(cfName) : null; this.archived = archived; } @@ -116,7 +120,7 @@ public BlockCacheKey(Path hfilePath, long offset, boolean isPrimaryReplica, Bloc @Override public int hashCode() { - return hfileNameId * 127 + (int) (offset ^ (offset >>> 32)); + return hfileName.hashCode() * 127 + (int) (offset ^ (offset >>> 32)); } @Override @@ -152,7 +156,7 @@ public long heapSize() { * @return The file name */ public String getHfileName() { - return stringPool.decode(hfileNameId); + return hfileName; } /** @@ -160,7 +164,7 @@ public String getHfileName() { * @return The region name */ public String getRegionName() { - return stringPool.decode(regionId); + return regionName; } /** @@ -168,7 +172,7 @@ public String getRegionName() { * @return The column family name */ public String getCfName() { - return stringPool.decode(cfId); + return cfName; } public boolean isPrimary() { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java index a65d0fd98954..60ba9f32cd7e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java @@ -410,7 +410,6 @@ private void startPersistenceRetriever(int[] bucketSizes, long capacity) { LOG.warn("Can't restore from file[{}]. The bucket cache will be reset and rebuilt." + " Exception seen: ", persistencePath, ex); backingMap.clear(); - FilePathStringPool.getInstance().clear(); fullyCachedFiles.clear(); backingMapValidated.set(true); regionCachedSize.clear(); @@ -793,7 +792,6 @@ private void updateRegionCachedSize(BlockCacheKey key, long cachedSize) { // remove the entry for that region from regionCachedSize map. if (regionCachedSize.get(regionName) <= 0) { regionCachedSize.remove(regionName); - FilePathStringPool.getInstance().remove(regionName); } } } @@ -1013,7 +1011,6 @@ public void logStats() { + cacheStats.getEvictedCount() + ", " + "evictedPerRun=" + cacheStats.evictedPerEviction() + ", " + "allocationFailCount=" + cacheStats.getAllocationFailCount() + ", blocksCount=" + backingMap.size()); - LOG.info(FilePathStringPool.getInstance().getPoolStats()); cacheStats.reset(); bucketAllocator.logDebugStatistics(); @@ -1757,7 +1754,6 @@ private void persistChunkedBackingMap(FileOutputStream fos) throws IOException { } private void retrieveChunkedBackingMap(FileInputStream in) throws IOException { - // Read the first chunk that has all the details. BucketCacheProtos.BucketCacheEntry cacheEntry = BucketCacheProtos.BucketCacheEntry.parseDelimitedFrom(in); @@ -1767,7 +1763,6 @@ private void retrieveChunkedBackingMap(FileInputStream in) throws IOException { backingMap.clear(); blocksByHFile.clear(); - FilePathStringPool.getInstance().clear(); // Read the backing map entries in batches. int numChunks = 0; @@ -1823,7 +1818,6 @@ private void disableCache() { this.blocksByHFile.clear(); this.fullyCachedFiles.clear(); this.regionCachedSize.clear(); - FilePathStringPool.getInstance().clear(); } if (cacheStats.getMetricsRollerScheduler() != null) { cacheStats.getMetricsRollerScheduler().shutdownNow(); @@ -1852,7 +1846,6 @@ public void shutdown() { } } persistToFile(); - FilePathStringPool.getInstance().clear(); } catch (IOException ex) { LOG.error("Unable to persist data on exit: " + ex.toString(), ex); } catch (InterruptedException e) { @@ -1943,9 +1936,6 @@ public int evictBlocksRangeByHfileName(String hfileName, long initOffset, long e Set keySet = getAllCacheKeysForFile(hfileName, initOffset, endOffset); // We need to make sure whether we are evicting all blocks for this given file. In case of // split references, we might be evicting just half of the blocks - int totalFileKeys = (endOffset == Long.MAX_VALUE) - ? keySet.size() - : getAllCacheKeysForFile(hfileName, 0, Long.MAX_VALUE).size(); LOG.debug("found {} blocks for file {}, starting offset: {}, end offset: {}", keySet.size(), hfileName, initOffset, endOffset); int numEvicted = 0; @@ -1954,11 +1944,6 @@ public int evictBlocksRangeByHfileName(String hfileName, long initOffset, long e ++numEvicted; } } - if (numEvicted > 0) { - if (totalFileKeys == numEvicted) { - FilePathStringPool.getInstance().remove(hfileName); - } - } return numEvicted; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FilePathStringPool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FilePathStringPool.java deleted file mode 100644 index 7e08158af0eb..000000000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FilePathStringPool.java +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.io.hfile.bucket; - -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicInteger; -import org.apache.yetus.audience.InterfaceAudience; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Pool of string values encoded to integer IDs for use in BlockCacheKey. This allows for avoiding - * duplicating string values for file names, region and CF values on various BlockCacheKey - * instances. Normally, single hfiles have many blocks. This means all blocks from the same file - * will have the very same file, region and CF names. On very large BucketCache setups (i.e. file - * based cache with TB size order), can save few GBs of memory by avoiding repeating these common - * string values on blocks from the same file. The FilePathStringPool is implemented as a singleton, - * since the same pool should be shared by all BlockCacheKey instances, as well as the BucketCache - * object itself. The Id for an encoded string is an integer. Any new String added to the pool is - * assigned the next available integer ID, starting from 0 upwards. That sets the total pool - * capacity to Integer.MAX_VALUE. In the event of ID exhaustion (integer overflow when Id values - * reach Integer.MAX_VALUE), the encode() method will restart iterating over int values - * incrementally from 0 until it finds an unused ID. Strings can be removed from the pool using the - * remove() method. BucketCache should call this when evicting all blocks for a given file (see - * BucketCache.evictFileBlocksFromCache()). - *

- * Thread-safe implementation that maintains bidirectional mappings between strings and IDs. - *

- */ -@InterfaceAudience.Private -public class FilePathStringPool { - private static final Logger LOG = LoggerFactory.getLogger(FilePathStringPool.class); - - // Bidirectional mappings for string objects re-use - private final ConcurrentHashMap stringToId = new ConcurrentHashMap<>(); - private final ConcurrentHashMap idToString = new ConcurrentHashMap<>(); - private final AtomicInteger nextId = new AtomicInteger(0); - - private static FilePathStringPool instance; - - public static FilePathStringPool getInstance() { - synchronized (FilePathStringPool.class) { - if (instance == null) { - instance = new FilePathStringPool(); - } - } - return instance; - } - - private FilePathStringPool() { - // Private constructor for singleton - } - - /** - * Gets or creates an integer ID for the given String. - * @param string value for the file/region/CF name. - * @return the integer ID encoding this string in the pool. - */ - public int encode(String string) { - if (string == null) { - throw new IllegalArgumentException("string cannot be null"); - } - return stringToId.computeIfAbsent(string, name -> { - if (stringToId.size() == Integer.MAX_VALUE) { - throw new IllegalStateException( - "String pool has reached maximum capacity of " + Integer.MAX_VALUE + " unique strings."); - } - int id = nextId.getAndIncrement(); - while (idToString.containsKey(id)) { - id = nextId.getAndIncrement(); - if (id == Integer.MAX_VALUE) { - nextId.set(0); - LOG.info("Id values reached Integer.MAX_VALUE, restarting from 0"); - } - } - idToString.put(id, name); - LOG.trace("Encoded new string to ID {}: {}", id, name); - return id; - }); - } - - /** - * Decodes an integer ID back to its original file name. - * @param id the integer ID - * @return the original file name, or null if not found - */ - public String decode(int id) { - return idToString.get(id); - } - - /** - * Checks if a given string ID is already being used. - * @param id the integer ID to check - * @return true if the ID exists - */ - public boolean contains(int id) { - return idToString.containsKey(id); - } - - /** - * Checks if a given string has been encoded. - * @param string the value to check - * @return true if the string value has been encoded - */ - public boolean contains(String string) { - return stringToId.containsKey(string); - } - - /** - * Gets the number of unique file names currently tracked. - * @return the number of entries in the codec - */ - public int size() { - return stringToId.size(); - } - - /** - * Removes a string value and its ID from the pool. This should only be called when all blocks for - * a file have been evicted from the cache. - * @param string the file name to remove - * @return true if the file name was removed, false if it wasn't present - */ - public boolean remove(String string) { - if (string == null) { - return false; - } - Integer id = stringToId.remove(string); - if (id != null) { - idToString.remove(id); - LOG.debug("Removed string value from pool: {} (ID: {})", string, id); - return true; - } - return false; - } - - /** - * Clears all mappings from the codec. - */ - public void clear() { - stringToId.clear(); - idToString.clear(); - nextId.set(0); - LOG.info("Cleared all file name mappings from codec"); - } - - /** - * Gets statistics about memory savings from string pooling. - * @return a formatted string with compression statistics - */ - public String getPoolStats() { - long uniqueStrings = stringToId.size(); - if (uniqueStrings == 0) { - return "No strings encoded"; - } - // Calculate average string length - long totalChars = stringToId.keySet().stream().mapToLong(String::length).sum(); - double avgLength = (double) totalChars / uniqueStrings; - return String.format("FilePathStringPool stats: %d unique strings, avg length: %.1f chars, ", - uniqueStrings, avgLength); - } -} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java index fc42009d4f9b..7ae2f3ef2160 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java @@ -49,7 +49,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Random; import java.util.Set; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.LongAdder; @@ -681,37 +680,6 @@ public void testEvictionCount() throws InterruptedException { assertEquals(1, cache.getStats().getEvictionCount()); } - @Test - public void testStringPool() throws Exception { - HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); - Path testDir = TEST_UTIL.getDataTestDir(); - TEST_UTIL.getTestFileSystem().mkdirs(testDir); - BucketCache bucketCache = - new BucketCache("file:" + testDir + "/bucket.cache", capacitySize, constructedBlockSize, - constructedBlockSizes, writeThreads, writerQLen, testDir + "/bucket.persistence"); - assertTrue(bucketCache.waitForCacheInitialization(10000)); - long usedSize = bucketCache.getAllocator().getUsedSize(); - assertEquals(0, usedSize); - Random rand = ThreadLocalRandom.current(); - Path filePath = new Path(testDir, Long.toString(rand.nextLong())); - CacheTestUtils.HFileBlockPair[] blocks = - CacheTestUtils.generateBlocksForPath(constructedBlockSize, 1, filePath, false); - String name = blocks[0].getBlockName().getHfileName(); - assertEquals(name, filePath.getName()); - assertNotNull(blocks[0].getBlockName().getRegionName()); - bucketCache.cacheBlock(blocks[0].getBlockName(), blocks[0].getBlock()); - waitUntilFlushedToBucket(bucketCache, blocks[0].getBlockName()); - assertTrue(FilePathStringPool.getInstance().size() > 0); - bucketCache.evictBlock(blocks[0].getBlockName()); - assertTrue(FilePathStringPool.getInstance().size() > 0); - bucketCache.cacheBlock(blocks[0].getBlockName(), blocks[0].getBlock()); - waitUntilFlushedToBucket(bucketCache, blocks[0].getBlockName()); - bucketCache.fileCacheCompleted(filePath, - bucketCache.backingMap.get(blocks[0].getBlockName()).getLength()); - bucketCache.evictBlocksByHfileName(name); - assertEquals(1, FilePathStringPool.getInstance().size()); - } - @Test public void testCacheBlockNextBlockMetadataMissing() throws Exception { int size = 100; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFilePathStringPool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFilePathStringPool.java deleted file mode 100644 index a42a61c93fbd..000000000000 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFilePathStringPool.java +++ /dev/null @@ -1,326 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.io.hfile.bucket; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import org.apache.hadoop.hbase.HBaseClassTestRule; -import org.apache.hadoop.hbase.testclassification.SmallTests; -import org.junit.Before; -import org.junit.ClassRule; -import org.junit.Test; -import org.junit.experimental.categories.Category; - -/** - * Tests for {@link FilePathStringPool} - */ -@Category({ SmallTests.class }) -public class TestFilePathStringPool { - - @ClassRule - public static final HBaseClassTestRule CLASS_RULE = - HBaseClassTestRule.forClass(TestFilePathStringPool.class); - - private FilePathStringPool pool; - - @Before - public void setUp() { - pool = FilePathStringPool.getInstance(); - pool.clear(); - } - - @Test - public void testSingletonPattern() { - FilePathStringPool instance1 = FilePathStringPool.getInstance(); - FilePathStringPool instance2 = FilePathStringPool.getInstance(); - assertNotNull(instance1); - assertNotNull(instance2); - assertEquals(instance1, instance2); - } - - @Test - public void testBasicEncodeDecodeRoundTrip() { - String testString = "/hbase/data/default/test-table/region1/cf1/file1.hfile"; - int id = pool.encode(testString); - String decoded = pool.decode(id); - assertEquals(testString, decoded); - } - - @Test - public void testEncodeReturnsSameIdForSameString() { - String testString = "/hbase/data/file.hfile"; - int id1 = pool.encode(testString); - int id2 = pool.encode(testString); - assertEquals(id1, id2); - assertEquals(1, pool.size()); - } - - @Test - public void testEncodeDifferentStringsGetDifferentIds() { - String string1 = "/path/to/file1.hfile"; - String string2 = "/path/to/file2.hfile"; - int id1 = pool.encode(string1); - int id2 = pool.encode(string2); - assertNotEquals(id1, id2); - assertEquals(2, pool.size()); - } - - @Test(expected = IllegalArgumentException.class) - public void testEncodeNullStringThrowsException() { - pool.encode(null); - } - - @Test - public void testDecodeNonExistentIdReturnsNull() { - String decoded = pool.decode(999999); - assertNull(decoded); - } - - @Test - public void testContainsWithId() { - String testString = "/hbase/file.hfile"; - int id = pool.encode(testString); - assertTrue(pool.contains(id)); - assertFalse(pool.contains(id + 1)); - } - - @Test - public void testContainsWithString() { - String testString = "/hbase/file.hfile"; - pool.encode(testString); - assertTrue(pool.contains(testString)); - assertFalse(pool.contains("/hbase/other-file.hfile")); - } - - @Test - public void testRemoveExistingString() { - String testString = "/hbase/file.hfile"; - int id = pool.encode(testString); - assertEquals(1, pool.size()); - assertTrue(pool.contains(testString)); - boolean removed = pool.remove(testString); - assertTrue(removed); - assertEquals(0, pool.size()); - assertFalse(pool.contains(testString)); - assertFalse(pool.contains(id)); - assertNull(pool.decode(id)); - } - - @Test - public void testRemoveNonExistentStringReturnsFalse() { - boolean removed = pool.remove("/non/existent/file.hfile"); - assertFalse(removed); - } - - @Test - public void testRemoveNullStringReturnsFalse() { - boolean removed = pool.remove(null); - assertFalse(removed); - } - - @Test - public void testClear() { - pool.encode("/file1.hfile"); - pool.encode("/file2.hfile"); - pool.encode("/file3.hfile"); - assertEquals(3, pool.size()); - pool.clear(); - assertEquals(0, pool.size()); - } - - @Test - public void testSizeTracking() { - assertEquals(0, pool.size()); - pool.encode("/file1.hfile"); - assertEquals(1, pool.size()); - pool.encode("/file2.hfile"); - assertEquals(2, pool.size()); - // Encoding same string should not increase size - pool.encode("/file1.hfile"); - assertEquals(2, pool.size()); - pool.remove("/file1.hfile"); - assertEquals(1, pool.size()); - pool.clear(); - assertEquals(0, pool.size()); - } - - @Test - public void testGetPoolStats() { - String stats = pool.getPoolStats(); - assertEquals("No strings encoded", stats); - pool.encode("/hbase/data/table1/file1.hfile"); - pool.encode("/hbase/data/table2/file2.hfile"); - stats = pool.getPoolStats(); - assertNotNull(stats); - assertTrue(stats.contains("2 unique strings")); - assertTrue(stats.contains("avg length:")); - } - - @Test - public void testConcurrentEncoding() throws InterruptedException { - int numThreads = 10; - int stringsPerThread = 100; - ExecutorService executor = Executors.newFixedThreadPool(numThreads); - CountDownLatch doneLatch = new CountDownLatch(numThreads); - ConcurrentHashMap results = new ConcurrentHashMap<>(); - AtomicInteger errorCount = new AtomicInteger(0); - - for (int t = 0; t < numThreads; t++) { - final int threadId = t; - executor.submit(() -> { - try { - for (int i = 0; i < stringsPerThread; i++) { - String string = "/thread" + threadId + "/file" + i + ".hfile"; - int id = pool.encode(string); - results.put(string, id); - } - } catch (Exception e) { - errorCount.incrementAndGet(); - } finally { - doneLatch.countDown(); - } - }); - } - - assertTrue(doneLatch.await(30, TimeUnit.SECONDS)); - executor.shutdown(); - - assertEquals(0, errorCount.get()); - assertEquals(numThreads * stringsPerThread, pool.size()); - assertEquals(numThreads * stringsPerThread, results.size()); - - // Verify all strings can be decoded correctly - for (Map.Entry entry : results.entrySet()) { - String decoded = pool.decode(entry.getValue()); - assertEquals(entry.getKey(), decoded); - } - } - - @Test - public void testConcurrentEncodingSameStrings() throws InterruptedException { - int numThreads = 20; - String sharedString = "/shared/file.hfile"; - ExecutorService executor = Executors.newFixedThreadPool(numThreads); - CountDownLatch doneLatch = new CountDownLatch(numThreads); - Set ids = ConcurrentHashMap.newKeySet(); - AtomicInteger errorCount = new AtomicInteger(0); - - for (int i = 0; i < numThreads; i++) { - executor.submit(() -> { - try { - int id = pool.encode(sharedString); - ids.add(id); - } catch (Exception e) { - errorCount.incrementAndGet(); - } finally { - doneLatch.countDown(); - } - }); - } - - doneLatch.await(10, TimeUnit.SECONDS); - executor.shutdown(); - - assertEquals(0, errorCount.get()); - // All threads should get the same ID - assertEquals(1, ids.size()); - assertEquals(1, pool.size()); - } - - @Test - public void testConcurrentRemoval() throws InterruptedException { - // Pre-populate with strings - List strings = new ArrayList<>(); - for (int i = 0; i < 100; i++) { - String string = "/file" + i + ".hfile"; - strings.add(string); - pool.encode(string); - } - assertEquals(100, pool.size()); - - int numThreads = 10; - ExecutorService executor = Executors.newFixedThreadPool(numThreads); - CountDownLatch doneLatch = new CountDownLatch(numThreads); - AtomicInteger successfulRemovals = new AtomicInteger(0); - - for (int t = 0; t < numThreads; t++) { - final int threadId = t; - executor.submit(() -> { - try { - for (int i = threadId * 10; i < (threadId + 1) * 10; i++) { - if (pool.remove(strings.get(i))) { - successfulRemovals.incrementAndGet(); - } - } - } catch (Exception e) { - // Ignore - } finally { - doneLatch.countDown(); - } - }); - } - - doneLatch.await(10, TimeUnit.SECONDS); - executor.shutdown(); - - assertEquals(100, successfulRemovals.get()); - assertEquals(0, pool.size()); - } - - @Test - public void testBidirectionalMappingConsistency() { - // Verify that both mappings stay consistent - List strings = new ArrayList<>(); - List ids = new ArrayList<>(); - - for (int i = 0; i < 50; i++) { - String string = "/region" + (i % 5) + "/file" + i + ".hfile"; - strings.add(string); - ids.add(pool.encode(string)); - } - - // Verify forward mapping (string -> id) - for (int i = 0; i < strings.size(); i++) { - int expectedId = ids.get(i); - int actualId = pool.encode(strings.get(i)); - assertEquals(expectedId, actualId); - } - - // Verify reverse mapping (id -> string) - for (int i = 0; i < ids.size(); i++) { - String expectedString = strings.get(i); - String actualString = pool.decode(ids.get(i)); - assertEquals(expectedString, actualString); - } - } -}