diff --git a/README.md b/README.md
index 9388108..4e6b713 100644
--- a/README.md
+++ b/README.md
@@ -518,6 +518,8 @@ See the `examples/middleware` directory for examples of using IP configuration.
 
 SRouter provides a flexible rate limiting system using `common.RateLimitConfig` (defined in `pkg/common/types.go`) that can be configured at the global, sub-router, or route level. Rate limits can be based on IP address, authenticated user, or custom criteria. Under the hood, SRouter uses [Uber's ratelimit library](https://github.com/uber-go/ratelimit) via the `middleware.RateLimit` function for efficient and smooth rate limiting with a leaky bucket algorithm.
 
+`UberRateLimiter` stores buckets in an in-memory LRU cache to bound memory usage. The cache size defaults to `10,000` entries and can be customized via `NewUberRateLimiterWithMax`.
+
 #### Rate Limiting Configuration
 
 ```go
diff --git a/docs/security.md b/docs/security.md
index c2c8e84..447b012 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -79,13 +79,14 @@ The rate limiting middleware in SRouter supports several strategies for identify
 
 ### Memory Consumption Considerations
 
-SRouter's default rate limiter, `UberRateLimiter` (which uses `go.uber.org/ratelimit`), stores rate limiting state in memory for each unique key (e.g., each IP address or user ID).
+SRouter's default rate limiter, `UberRateLimiter` (which uses `go.uber.org/ratelimit`), stores rate limiting state in memory for each unique key (e.g., each IP address or user ID). To prevent unbounded growth, it now keeps these limiters in an in-memory LRU cache with a configurable maximum size (default `10,000` entries).
 
 *   **High Cardinality Warning:** If your application anticipates a very large number of unique keys (e.g., millions of different IP addresses making requests over a short period, or a vast number of user accounts being actively rate-limited simultaneously) without application restarts, this in-memory storage can lead to significant memory consumption.
 *   **Suitability:** The current `UberRateLimiter` is well-suited for many common use cases, especially for applications with a moderate number of active users or for services deployed behind a load balancer that already handles a significant portion of traffic.
 *   **Alternatives for Extreme Scale:** For extremely high-traffic scenarios with very high cardinality of rate limiting keys, or if you require persistence of rate limiting state across application restarts or a distributed environment, consider:
+    *   Increasing or decreasing the LRU cache size via `NewUberRateLimiterWithMax` when memory constraints or key cardinality require it.
     *   Using an external rate-limiting solution (e.g., Redis-based, or dedicated proxy/API gateway rate limiters).
-    *   Implementing a custom `RateLimiter` for SRouter that uses a backend store with eviction policies (e.g., LRU cache, Redis with TTLs) to manage memory usage.
+    *   Implementing a custom `RateLimiter` for SRouter that uses a backend store with eviction policies (e.g., Redis with TTLs) to manage memory usage.
 
 ### Standard Rate Limit Headers
 
diff --git a/pkg/middleware/ratelimit.go b/pkg/middleware/ratelimit.go
index 2461af0..5c02da2 100644
--- a/pkg/middleware/ratelimit.go
+++ b/pkg/middleware/ratelimit.go
@@ -2,6 +2,7 @@
 package middleware
 
 import (
+	"container/list"
 	"errors" // Added for error handling
 	"fmt"
 	"net/http"
@@ -22,36 +23,68 @@ import (
 // by allowing a steady flow of requests while preventing bursts.
 // The implementation maintains a map of rate limiters, one per unique key.
 type UberRateLimiter struct {
-	limiters sync.Map // map[string]ratelimit.Limiter
+	mu         sync.Mutex
+	limiters   map[string]*list.Element // LRU cache mapping
+	lru        *list.List               // order of keys, most recent at front
+	maxEntries int
 }
 
-// NewUberRateLimiter creates a new UberRateLimiter instance.
-// The returned limiter uses the leaky bucket algorithm to enforce rate limits.
-// It maintains separate rate limiters for different keys (e.g., different IPs or users).
+type limiterEntry struct {
+	key     string
+	limiter ratelimit.Limiter
+}
+
+// NewUberRateLimiter creates a new UberRateLimiter instance using a default cache size.
+// The returned limiter uses the leaky bucket algorithm to enforce rate limits and
+// keeps recently used limiters in an LRU cache to bound memory usage.
+const defaultLimiterCacheSize = 10000
+
 func NewUberRateLimiter() *UberRateLimiter {
-	return &UberRateLimiter{}
+	return NewUberRateLimiterWithMax(defaultLimiterCacheSize)
+}
+
+// NewUberRateLimiterWithMax creates a new UberRateLimiter instance with a
+// maximum number of limiter entries to keep in memory. If maxEntries is <= 0
+// a reasonable default is used.
+func NewUberRateLimiterWithMax(maxEntries int) *UberRateLimiter {
+	if maxEntries <= 0 {
+		maxEntries = defaultLimiterCacheSize
+	}
+	return &UberRateLimiter{
+		limiters:   make(map[string]*list.Element),
+		lru:        list.New(),
+		maxEntries: maxEntries,
+	}
 }
 
 // getLimiter gets or creates a limiter for the given key and rate (requests per second).
 // It uses a composite key including the RPS to handle different rate limits for the same base key.
 func (u *UberRateLimiter) getLimiter(key string, rps int) ratelimit.Limiter {
-	compositeKey := fmt.Sprintf("%s-%d", key, rps) // Combine key and rps
+	compositeKey := fmt.Sprintf("%s-%d", key, rps)
+
+	u.mu.Lock()
+	defer u.mu.Unlock()
 
-	// Fast path: Check if limiter already exists.
-	if limiter, ok := u.limiters.Load(compositeKey); ok {
-		return limiter.(ratelimit.Limiter)
+	if elem, ok := u.limiters[compositeKey]; ok {
+		u.lru.MoveToFront(elem)
+		return elem.Value.(*limiterEntry).limiter
 	}
 
-	// Slow path: Limiter doesn't exist, create a new one.
 	newLimiter := ratelimit.New(rps)
 
-	// Atomically load or store.
-	// - If compositeKey already exists (due to concurrent creation), LoadOrStore loads and returns the existing value.
-	// - If compositeKey doesn't exist, LoadOrStore stores newLimiter and returns it.
-	actualLimiter, _ := u.limiters.LoadOrStore(compositeKey, newLimiter)
+	// Evict oldest if we exceed the maximum size
+	if u.lru.Len() >= u.maxEntries {
+		if back := u.lru.Back(); back != nil {
+			ent := back.Value.(*limiterEntry)
+			delete(u.limiters, ent.key)
+			u.lru.Remove(back)
+		}
+	}
 
-	// Return the actual limiter stored in the map (either the existing one or the new one).
-	return actualLimiter.(ratelimit.Limiter)
+	ent := &limiterEntry{key: compositeKey, limiter: newLimiter}
+	elem := u.lru.PushFront(ent)
+	u.limiters[compositeKey] = elem
+	return newLimiter
 }
 
 // Ensure UberRateLimiter implements the common.RateLimiter interface.
diff --git a/pkg/middleware/ratelimit_coverage_test.go b/pkg/middleware/ratelimit_coverage_test.go
index ca51c8e..9e839f5 100644
--- a/pkg/middleware/ratelimit_coverage_test.go
+++ b/pkg/middleware/ratelimit_coverage_test.go
@@ -66,16 +66,16 @@ func TestGetLimiter_Coverage(t *testing.T) {
 				// Check if the same limiter instance is returned across goroutines
 				// This indirectly tests that the double-check prevents creating multiple limiters
 				compositeKey := fmt.Sprintf("%s-%d", key, rps)
-				val, ok := limiter.limiters.Load(compositeKey)
-				// Explicitly check 'ok' before proceeding to prevent panic on nil interface conversion
+				limiter.mu.Lock()
+				elem, ok := limiter.limiters[compositeKey]
+				var currentLimiter ratelimit.Limiter
+				if ok {
+					currentLimiter = elem.Value.(*limiterEntry).limiter
+				}
+				limiter.mu.Unlock()
 				if !assert.True(t, ok, "Limiter should exist in the map for key %s", compositeKey) {
-					// If the assertion fails (ok is false), skip the rest of the checks for this iteration
-					// as val will be nil, causing a panic on type assertion.
-					// This indicates a potential timing issue or problem in limiter creation/storage.
-					continue // Skip to the next iteration of the inner loop
+					continue
 				}
-				// Only proceed if ok is true
-				currentLimiter := val.(ratelimit.Limiter)
 
 				once.Do(func() {
 					firstLimiter = currentLimiter // Capture the first successfully retrieved limiter
@@ -91,12 +91,13 @@ func TestGetLimiter_Coverage(t *testing.T) {
 
 	// Final check that only one limiter was created for the key/rps combination
 	count := 0
-	limiter.limiters.Range(func(k, v interface{}) bool {
+	limiter.mu.Lock()
+	for k := range limiter.limiters {
 		if k == fmt.Sprintf("%s-%d", key, rps) {
 			count++
 		}
-		return true
-	})
+	}
+	limiter.mu.Unlock()
 	assert.Equal(t, 1, count, "Expected exactly one limiter instance for the key/rps")
 }
 
@@ -227,7 +228,7 @@ func TestUberRateLimiter_ForceNegativeRemaining(t *testing.T) {
 
 	// Override the Allow method to force a negative remaining calculation
 	// This directly tests the if remaining < 0 { remaining = 0 } code path
-	mockLimiter := &mockLimiterWithNegativeRemaining{}
+	mockLimiter := &mockLimiterWithNegativeRemaining{*NewUberRateLimiter()}
 
 	// Create a test scenario where the calculation would result in negative remaining
 	key := "negative-remaining-test"
diff --git a/pkg/middleware/ratelimit_test.go b/pkg/middleware/ratelimit_test.go
index 02a4937..109c86b 100644
--- a/pkg/middleware/ratelimit_test.go
+++ b/pkg/middleware/ratelimit_test.go
@@ -41,7 +41,9 @@ func TestUberRateLimiter(t *testing.T) {
 		rps1 = 1
 	}
 	compositeKey1 := fmt.Sprintf("%s-%d", key, rps1)
-	_, exists := limiter.limiters.Load(compositeKey1)
+	limiter.mu.Lock()
+	_, exists := limiter.limiters[compositeKey1]
+	limiter.mu.Unlock()
 	if !exists {
 		t.Errorf("Expected limiter to be stored for composite key %s", compositeKey1)
 	}
@@ -63,7 +65,9 @@ func TestUberRateLimiter(t *testing.T) {
 		rps2 = 1
 	}
 	compositeKey2 := fmt.Sprintf("%s-%d", otherKey, rps2)
-	_, exists = limiter.limiters.Load(compositeKey2)
+	limiter.mu.Lock()
+	_, exists = limiter.limiters[compositeKey2]
+	limiter.mu.Unlock()
 	if !exists {
 		t.Errorf("Expected limiter to be stored for composite key %s", compositeKey2)
 	}
@@ -85,7 +89,9 @@ func TestUberRateLimiter(t *testing.T) {
 		rps3 = 1
 	}
 	compositeKey3 := fmt.Sprintf("%s-%d", key, rps3)
-	_, exists = limiter.limiters.Load(compositeKey3)
+	limiter.mu.Lock()
+	_, exists = limiter.limiters[compositeKey3]
+	limiter.mu.Unlock()
 	if !exists {
 		t.Errorf("Expected limiter to be stored for composite key %s (different limit/window)", compositeKey3)
 	}
@@ -770,3 +776,45 @@ func TestRateLimitMiddlewareDefaultStrategy(t *testing.T) {
 		t.Errorf("Expected Retry-After header to be set")
 	}
 }
+
+func TestUberRateLimiterEviction(t *testing.T) {
+	limiter := NewUberRateLimiterWithMax(2)
+	key1 := "k1"
+	key2 := "k2"
+	key3 := "k3"
+	limit := 1
+	window := time.Second
+
+	limiter.Allow(key1, limit, window)
+	limiter.Allow(key2, limit, window)
+
+	limiter.mu.Lock()
+	if len(limiter.limiters) != 2 {
+		t.Fatalf("expected 2 limiters, got %d", len(limiter.limiters))
+	}
+	limiter.mu.Unlock()
+
+	limiter.Allow(key3, limit, window)
+
+	rps := int(float64(limit) / window.Seconds())
+	c1 := fmt.Sprintf("%s-%d", key1, rps)
+	c2 := fmt.Sprintf("%s-%d", key2, rps)
+	c3 := fmt.Sprintf("%s-%d", key3, rps)
+
+	limiter.mu.Lock()
+	_, ok1 := limiter.limiters[c1]
+	_, ok2 := limiter.limiters[c2]
+	_, ok3 := limiter.limiters[c3]
+	count := len(limiter.limiters)
+	limiter.mu.Unlock()
+
+	if count != 2 {
+		t.Fatalf("limiter count expected 2, got %d", count)
+	}
+	if ok1 {
+		t.Errorf("oldest entry was not evicted")
+	}
+	if !ok2 || !ok3 {
+		t.Errorf("expected newest entries to remain")
+	}
+}