Skip to content

Commit 477a5d6

Browse files
author
Joel Jeske
committed
[metrics] add lru evict atime metric
1 parent 4323662 commit 477a5d6

File tree

2 files changed

+68
-13
lines changed

2 files changed

+68
-13
lines changed

cache/disk/disk.go

Lines changed: 61 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"strconv"
2121
"strings"
2222
"sync"
23+
"time"
2324

2425
"github.com/buchgr/bazel-remote/cache"
2526
"github.com/buchgr/bazel-remote/cache/disk/casblob"
@@ -31,6 +32,8 @@ import (
3132
pb "github.com/buchgr/bazel-remote/genproto/build/bazel/remote/execution/v2"
3233
"google.golang.org/protobuf/proto"
3334

35+
"github.com/prometheus/client_golang/prometheus"
36+
3437
"golang.org/x/sync/semaphore"
3538
)
3639

@@ -83,6 +86,8 @@ type diskCache struct {
8386

8487
mu sync.Mutex
8588
lru SizedLRU
89+
90+
gaugeCacheAge prometheus.Gauge
8691
}
8792

8893
type nameAndInfo struct {
@@ -135,6 +140,11 @@ func New(dir string, maxSizeBytes int64, opts ...Option) (Cache, error) {
135140
// I suppose it's better to slow down processing than to crash
136141
// when hitting the 10k limit or to run out of disk space.
137142
fileRemovalSem: semaphore.NewWeighted(5000),
143+
144+
gaugeCacheAge: prometheus.NewGauge(prometheus.GaugeOpts{
145+
Name: "bazel_remote_disk_cache_age_seconds",
146+
Help: "The file `atime` of oldest item in the LRU cache. Depending on filemount opts (e.g. relatime), the resolution may be meausured in 'days' and not accurate to the second",
147+
}),
138148
}
139149

140150
cc := CacheConfig{diskCache: &c}
@@ -143,19 +153,7 @@ func New(dir string, maxSizeBytes int64, opts ...Option) (Cache, error) {
143153
// This function is only called while the lock is held
144154
// by the current goroutine.
145155
onEvict := func(key Key, value lruItem) {
146-
ks := key.(string)
147-
hash := ks[len(ks)-sha256.Size*2:]
148-
var kind cache.EntryKind = cache.AC
149-
if strings.HasPrefix(ks, "cas") {
150-
kind = cache.CAS
151-
} else if strings.HasPrefix(ks, "ac") {
152-
kind = cache.AC
153-
} else if strings.HasPrefix(ks, "raw") {
154-
kind = cache.RAW
155-
}
156-
157-
f := filepath.Join(dir, c.FileLocation(kind, value.legacy, hash, value.size, value.random))
158-
156+
f := c.getElementPath(key, value)
159157
// Run in a goroutine so we can release the lock sooner.
160158
go c.removeFile(f)
161159
}
@@ -211,6 +209,56 @@ func New(dir string, maxSizeBytes int64, opts ...Option) (Cache, error) {
211209
// Non-test users must call this to expose metrics.
212210
func (c *diskCache) RegisterMetrics() {
213211
c.lru.RegisterMetrics()
212+
213+
prometheus.MustRegister(c.gaugeCacheAge)
214+
215+
// Update the cache age metric on a static interval
216+
// Note: this could be modeled as a GuageFunc that updates as needed
217+
// but since the updater func must lock the cache mu, it was deemed
218+
// necessary to have greater control of when to get the cache age
219+
go c.pollCacheAge()
220+
}
221+
222+
// Update metric every minute with the current age of the cache
223+
func (c *diskCache) pollCacheAge() {
224+
ticker := time.NewTicker(60 * time.Second)
225+
for range ticker.C {
226+
c.updateCacheAgeMetric()
227+
}
228+
}
229+
230+
// Get the back item in the cache, and store its atime in a metric
231+
func (c *diskCache) updateCacheAgeMetric() {
232+
c.mu.Lock()
233+
234+
key, value := c.lru.PeakOldestCacheItem()
235+
f := c.getElementPath(key, value)
236+
ts, err := atime.Stat(f)
237+
238+
c.mu.Unlock()
239+
240+
if err != nil {
241+
log.Printf("ERROR: failed to determine cache age: %v, unable to stat %s", err, f)
242+
c.gaugeCacheAge.Set(0.0)
243+
} else {
244+
age := time.Now().Sub(ts).Seconds()
245+
c.gaugeCacheAge.Set(age)
246+
}
247+
}
248+
249+
func (c *diskCache) getElementPath(key Key, value lruItem) string {
250+
ks := key.(string)
251+
hash := ks[len(ks)-sha256.Size*2:]
252+
var kind cache.EntryKind = cache.AC
253+
if strings.HasPrefix(ks, "cas") {
254+
kind = cache.CAS
255+
} else if strings.HasPrefix(ks, "ac") {
256+
kind = cache.AC
257+
} else if strings.HasPrefix(ks, "raw") {
258+
kind = cache.RAW
259+
}
260+
261+
return filepath.Join(c.dir, c.FileLocation(kind, value.legacy, hash, value.size, value.random))
214262
}
215263

216264
func (c *diskCache) removeFile(f string) {

cache/disk/lru.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,3 +280,10 @@ func (c *SizedLRU) removeElement(e *list.Element) {
280280
func roundUp4k(n int64) int64 {
281281
return (n + BlockSize - 1) & -BlockSize
282282
}
283+
284+
// Get the back item of the LRU cache.
285+
func (c *SizedLRU) PeakOldestCacheItem() (Key, lruItem) {
286+
ele := c.ll.Back()
287+
kv := ele.Value.(*entry)
288+
return kv.key, kv.value
289+
}

0 commit comments

Comments
 (0)