`fd186d2`

S36: internal/cache/lru — count + sized LRU + singleflight Group

Authored by

espadonne 4 days ago

SHA: fd186d24d239bfe9122515a03f304c7be995be81
Parents: 8439048
Tree: 1d84442

4 changed files

Status	File	+
A	`internal/cache/lru/group.go`	79
A	`internal/cache/lru/lru.go`	162
A	`internal/cache/lru/lru_test.go`	158
A	`internal/cache/lru/sized.go`	122

internal/cache/lru/group.goadded

 +// SPDX-License-Identifier: AGPL-3.0-or-later
++
 +package lru
++
 +import (
 +	"context"
++
 +	"golang.org/x/sync/singleflight"
 +)
++
 +// Group wraps a Cache with single-flight semantics so a hot-key
 +// miss doesn't spawn N concurrent upstream calls. The fetch function
 +// is invoked at most once per (key, in-flight wave) — concurrent
 +// callers wait on the same goroutine and receive its result.
 +//
 +// Use this whenever the upstream is non-trivial: a `git rev-list`
 +// subprocess, an FS walk, a multi-row DB read.
 +type Group[K comparable, V any] struct {
 +	cache *Cache[K, V]
 +	sf    singleflight.Group
 +	// keyer converts the typed key into the singleflight string key.
 +	// We keep the cache strongly-typed but singleflight is string-
 +	// keyed, so callers supply a stable string mapping.
 +	keyer func(K) string
 +}
++
 +// NewGroup wraps cache with singleflight. keyer must produce a
 +// stable, unique string for every distinct K (default: `fmt.Sprint`-
 +// equivalent for the type). For composite keys (struct), the caller
 +// is on the hook for serialization.
 +func NewGroup[K comparable, V any](cache *Cache[K, V], keyer func(K) string) *Group[K, V] {
 +	if cache == nil {
 +		panic("lru: nil Cache in NewGroup")
 +	}
 +	if keyer == nil {
 +		panic("lru: nil keyer in NewGroup")
 +	}
 +	return &Group[K, V]{cache: cache, keyer: keyer}
 +}
++
 +// Do returns the cached value when present, otherwise invokes fetch
 +// (single-flighted) and caches the result before returning.
 +//
 +// Errors from fetch are NOT cached — a transient failure on key K
 +// shouldn't poison subsequent reads. Callers that want negative-
 +// caching add their own sentinel value.
 +func (g *Group[K, V]) Do(ctx context.Context, key K, fetch func(ctx context.Context) (V, error)) (V, error) {
 +	if v, ok := g.cache.Get(key); ok {
 +		return v, nil
 +	}
 +	sk := g.keyer(key)
 +	v, err, _ := g.sf.Do(sk, func() (any, error) {
 +		// Re-check the cache after acquiring the singleflight slot:
 +		// the previous in-flight call may have populated it while we
 +		// were waiting.
 +		if v, ok := g.cache.Get(key); ok {
 +			return v, nil
 +		}
 +		v, err := fetch(ctx)
 +		if err != nil {
 +			return v, err
 +		}
 +		g.cache.Set(key, v)
 +		return v, nil
 +	})
 +	if err != nil {
 +		var zero V
 +		return zero, err
 +	}
 +	return v.(V), nil
 +}
++
 +// Invalidate drops key from the cache. Safe to call from anywhere
 +// (push handlers, settings updates) without coordinating with
 +// in-flight singleflight callers — the next Do re-fetches.
 +func (g *Group[K, V]) Invalidate(key K) { g.cache.Delete(key) }
++
 +// Stats reports the underlying cache's counters.
 +func (g *Group[K, V]) Stats() Stats { return g.cache.Stats() }

internal/cache/lru/lru.goadded

 +// SPDX-License-Identifier: AGPL-3.0-or-later
++
 +// Package lru is an in-process least-recently-used cache with
 +// optional TTL + singleflight wrapping. The S36 perf-pass standardizes
 +// on this package for every cross-request cache (refs, tree, ahead/
 +// behind, rendered markdown, etc.) so callers don't roll their own
 +// eviction story.
 +//
 +// Two core types:
 +//
 +//   - Cache[K,V]    — count-bounded LRU with optional per-entry TTL.
 +//     The dumb-and-fast variant for value types whose
 +//     in-memory cost is uniform.
 +//
 +//   - SizedCache[K] — byte-bounded LRU. Each entry contributes a
 +//     caller-supplied size; eviction runs while the
 +//     total exceeds the cap. For values whose size
 +//     varies per key (rendered HTML, diff blobs).
 +//
 +// Both expose Get / Set / Delete / Len + a Stats accessor for the
 +// /metrics surface (S36 baseline asserts hit-rate). Hot-key dogpile
 +// prevention lives one layer up in `Group` (singleflight wrapper).
 +package lru
++
 +import (
 +	"container/list"
 +	"sync"
 +	"sync/atomic"
 +	"time"
 +)
++
 +// Stats is the per-cache hit / miss / eviction counter set. Counters
 +// are atomic so /metrics can read without locking the cache.
 +type Stats struct {
 +	Hits      uint64
 +	Misses    uint64
 +	Evictions uint64
 +}
++
 +// Cache is a count-bounded LRU. Construct with New[K,V](capacity).
 +type Cache[K comparable, V any] struct {
 +	mu       sync.Mutex
 +	capacity int
 +	ll       *list.List
 +	items    map[K]*list.Element
 +	ttl      time.Duration // zero = no TTL
 +	now      func() time.Time
++
 +	hits      atomic.Uint64
 +	misses    atomic.Uint64
 +	evictions atomic.Uint64
 +}
++
 +// entry is the linked-list payload. ExpiresAt is zero when the
 +// cache has no TTL.
 +type entry[K comparable, V any] struct {
 +	key       K
 +	val       V
 +	expiresAt time.Time
 +}
++
 +// New constructs a count-bounded LRU. capacity must be positive.
 +// The TTL defaults to "no expiry"; use NewWithTTL to set one.
 +func New[K comparable, V any](capacity int) *Cache[K, V] {
 +	if capacity <= 0 {
 +		panic("lru: capacity must be positive")
 +	}
 +	return &Cache[K, V]{
 +		capacity: capacity,
 +		ll:       list.New(),
 +		items:    make(map[K]*list.Element, capacity),
 +		now:      time.Now,
 +	}
 +}
++
 +// NewWithTTL is like New plus a per-entry TTL. Entries past their
 +// TTL are treated as misses on Get and dropped on access.
 +func NewWithTTL[K comparable, V any](capacity int, ttl time.Duration) *Cache[K, V] {
 +	c := New[K, V](capacity)
 +	c.ttl = ttl
 +	return c
 +}
++
 +// Get returns the value for key + true on hit, zero value + false on
 +// miss (including TTL-expired entries).
 +func (c *Cache[K, V]) Get(key K) (V, bool) {
 +	var zero V
 +	c.mu.Lock()
 +	defer c.mu.Unlock()
 +	el, ok := c.items[key]
 +	if !ok {
 +		c.misses.Add(1)
 +		return zero, false
 +	}
 +	e := el.Value.(*entry[K, V])
 +	if c.ttl > 0 && c.now().After(e.expiresAt) {
 +		c.removeElement(el)
 +		c.misses.Add(1)
 +		return zero, false
 +	}
 +	c.ll.MoveToFront(el)
 +	c.hits.Add(1)
 +	return e.val, true
 +}
++
 +// Set stores key→val, evicting the least-recently-used entry when at
 +// capacity. Replacing an existing key resets its TTL.
 +func (c *Cache[K, V]) Set(key K, val V) {
 +	c.mu.Lock()
 +	defer c.mu.Unlock()
 +	if el, ok := c.items[key]; ok {
 +		e := el.Value.(*entry[K, V])
 +		e.val = val
 +		if c.ttl > 0 {
 +			e.expiresAt = c.now().Add(c.ttl)
 +		}
 +		c.ll.MoveToFront(el)
 +		return
 +	}
 +	e := &entry[K, V]{key: key, val: val}
 +	if c.ttl > 0 {
 +		e.expiresAt = c.now().Add(c.ttl)
 +	}
 +	el := c.ll.PushFront(e)
 +	c.items[key] = el
 +	if c.ll.Len() > c.capacity {
 +		c.removeElement(c.ll.Back())
 +		c.evictions.Add(1)
 +	}
 +}
++
 +// Delete removes key. No-op when absent.
 +func (c *Cache[K, V]) Delete(key K) {
 +	c.mu.Lock()
 +	defer c.mu.Unlock()
 +	if el, ok := c.items[key]; ok {
 +		c.removeElement(el)
 +	}
 +}
++
 +// Len reports the live entry count (does NOT scan for TTL expiry —
 +// that happens lazily on Get).
 +func (c *Cache[K, V]) Len() int {
 +	c.mu.Lock()
 +	defer c.mu.Unlock()
 +	return c.ll.Len()
 +}
++
 +// Stats returns a snapshot of the hit / miss / eviction counters.
 +func (c *Cache[K, V]) Stats() Stats {
 +	return Stats{
 +		Hits:      c.hits.Load(),
 +		Misses:    c.misses.Load(),
 +		Evictions: c.evictions.Load(),
 +	}
 +}
++
 +func (c *Cache[K, V]) removeElement(el *list.Element) {
 +	e := el.Value.(*entry[K, V])
 +	c.ll.Remove(el)
 +	delete(c.items, e.key)
 +}

internal/cache/lru/lru_test.goadded

 +// SPDX-License-Identifier: AGPL-3.0-or-later
++
 +package lru
++
 +import (
 +	"context"
 +	"errors"
 +	"strconv"
 +	"sync"
 +	"sync/atomic"
 +	"testing"
 +	"time"
 +)
++
 +func TestCache_GetSetEviction(t *testing.T) {
 +	t.Parallel()
 +	c := New[string, int](2)
 +	c.Set("a", 1)
 +	c.Set("b", 2)
 +	if v, ok := c.Get("a"); !ok || v != 1 {
 +		t.Fatalf("Get(a) = %d,%v; want 1,true", v, ok)
 +	}
 +	// Touching "a" makes "b" the LRU.
 +	c.Set("c", 3)
 +	if _, ok := c.Get("b"); ok {
 +		t.Errorf("b should have been evicted")
 +	}
 +	if v, ok := c.Get("c"); !ok || v != 3 {
 +		t.Errorf("c = %d,%v; want 3,true", v, ok)
 +	}
 +	if s := c.Stats(); s.Evictions != 1 {
 +		t.Errorf("Evictions = %d; want 1", s.Evictions)
 +	}
 +}
++
 +func TestCache_TTLExpiry(t *testing.T) {
 +	t.Parallel()
 +	c := NewWithTTL[string, int](4, 50*time.Millisecond)
 +	now := time.Now()
 +	c.now = func() time.Time { return now }
 +	c.Set("k", 42)
 +	if v, ok := c.Get("k"); !ok || v != 42 {
 +		t.Fatalf("fresh hit: got %d,%v; want 42,true", v, ok)
 +	}
 +	now = now.Add(60 * time.Millisecond)
 +	if _, ok := c.Get("k"); ok {
 +		t.Errorf("expired entry should be a miss")
 +	}
 +}
++
 +func TestCache_DeleteAndStats(t *testing.T) {
 +	t.Parallel()
 +	c := New[string, int](2)
 +	c.Set("a", 1)
 +	c.Get("a")
 +	c.Get("missing")
 +	c.Delete("a")
 +	if c.Len() != 0 {
 +		t.Errorf("Len after Delete = %d; want 0", c.Len())
 +	}
 +	s := c.Stats()
 +	if s.Hits != 1 || s.Misses != 1 {
 +		t.Errorf("stats = %+v; want hits=1 misses=1", s)
 +	}
 +}
++
 +func TestSizedCache_BytesBounded(t *testing.T) {
 +	t.Parallel()
 +	c := NewSized[string](100)
 +	c.Set("a", make([]byte, 60))
 +	c.Set("b", make([]byte, 60)) // forces eviction of "a"
 +	if _, ok := c.Get("a"); ok {
 +		t.Errorf("a should have been evicted to fit b")
 +	}
 +	if c.Bytes() != 60 {
 +		t.Errorf("Bytes = %d; want 60", c.Bytes())
 +	}
 +}
++
 +func TestSizedCache_ReplaceShrinks(t *testing.T) {
 +	t.Parallel()
 +	c := NewSized[string](100)
 +	c.Set("a", make([]byte, 80))
 +	c.Set("a", make([]byte, 10)) // smaller replacement
 +	if c.Bytes() != 10 {
 +		t.Errorf("Bytes after shrink = %d; want 10", c.Bytes())
 +	}
 +}
++
 +func TestGroup_SingleFlightCollapsesConcurrentMisses(t *testing.T) {
 +	t.Parallel()
 +	c := New[string, int](16)
 +	g := NewGroup(c, func(s string) string { return s })
++
 +	var calls atomic.Int64
 +	fetch := func(ctx context.Context) (int, error) {
 +		calls.Add(1)
 +		time.Sleep(20 * time.Millisecond)
 +		return 99, nil
 +	}
++
 +	const N = 50
 +	var wg sync.WaitGroup
 +	wg.Add(N)
 +	for i := 0; i < N; i++ {
 +		go func() {
 +			defer wg.Done()
 +			v, err := g.Do(context.Background(), "k", fetch)
 +			if err != nil || v != 99 {
 +				t.Errorf("Do = %d,%v; want 99,nil", v, err)
 +			}
 +		}()
 +	}
 +	wg.Wait()
++
 +	if calls.Load() != 1 {
 +		t.Errorf("upstream called %d times; want 1 (singleflight collapse failed)", calls.Load())
 +	}
 +}
++
 +func TestGroup_ErrorNotCached(t *testing.T) {
 +	t.Parallel()
 +	c := New[string, int](4)
 +	g := NewGroup(c, func(s string) string { return s })
++
 +	var attempt atomic.Int64
 +	fetch := func(ctx context.Context) (int, error) {
 +		n := attempt.Add(1)
 +		if n == 1 {
 +			return 0, errors.New("transient")
 +		}
 +		return 7, nil
 +	}
 +	if _, err := g.Do(context.Background(), "k", fetch); err == nil {
 +		t.Fatalf("expected error on first call")
 +	}
 +	v, err := g.Do(context.Background(), "k", fetch)
 +	if err != nil {
 +		t.Fatalf("second call err: %v", err)
 +	}
 +	if v != 7 {
 +		t.Errorf("v = %d; want 7", v)
 +	}
 +}
++
 +func BenchmarkCacheSetGet(b *testing.B) {
 +	c := New[int, int](1024)
 +	for i := 0; i < 1024; i++ {
 +		c.Set(i, i)
 +	}
 +	b.ResetTimer()
 +	for i := 0; i < b.N; i++ {
 +		_, _ = c.Get(i & 1023)
 +	}
 +}
++
 +// Reference for keyer construction in the test above.
 +var _ = strconv.Itoa

internal/cache/lru/sized.goadded

 +// SPDX-License-Identifier: AGPL-3.0-or-later
++
 +package lru
++
 +import (
 +	"container/list"
 +	"sync"
 +	"sync/atomic"
 +)
++
 +// SizedCache is a byte-bounded LRU. Each entry contributes a caller-
 +// supplied size; eviction runs while the running total exceeds the
 +// cap. Use this for caches whose values vary widely in memory cost
 +// (rendered HTML, diff blobs, response bodies).
 +type SizedCache[K comparable] struct {
 +	mu       sync.Mutex
 +	maxBytes int64
 +	cur      int64
 +	ll       *list.List
 +	items    map[K]*list.Element
++
 +	hits      atomic.Uint64
 +	misses    atomic.Uint64
 +	evictions atomic.Uint64
 +}
++
 +type sizedEntry[K comparable] struct {
 +	key  K
 +	val  []byte
 +	size int64
 +}
++
 +// NewSized constructs a byte-bounded LRU. maxBytes must be positive.
 +func NewSized[K comparable](maxBytes int64) *SizedCache[K] {
 +	if maxBytes <= 0 {
 +		panic("lru: maxBytes must be positive")
 +	}
 +	return &SizedCache[K]{
 +		maxBytes: maxBytes,
 +		ll:       list.New(),
 +		items:    make(map[K]*list.Element),
 +	}
 +}
++
 +// Get returns the cached bytes + true on hit. The returned slice is
 +// the cached buffer (zero-copy) — callers MUST NOT mutate it. Use
 +// `append([]byte(nil), v...)` if mutation is needed.
 +func (c *SizedCache[K]) Get(key K) ([]byte, bool) {
 +	c.mu.Lock()
 +	defer c.mu.Unlock()
 +	el, ok := c.items[key]
 +	if !ok {
 +		c.misses.Add(1)
 +		return nil, false
 +	}
 +	c.ll.MoveToFront(el)
 +	c.hits.Add(1)
 +	return el.Value.(*sizedEntry[K]).val, true
 +}
++
 +// Set stores key→val. Replacing an existing key updates its size.
 +// Eviction runs after insertion until total bytes ≤ maxBytes.
 +func (c *SizedCache[K]) Set(key K, val []byte) {
 +	c.mu.Lock()
 +	defer c.mu.Unlock()
 +	size := int64(len(val))
 +	if el, ok := c.items[key]; ok {
 +		e := el.Value.(*sizedEntry[K])
 +		c.cur += size - e.size
 +		e.val = val
 +		e.size = size
 +		c.ll.MoveToFront(el)
 +	} else {
 +		e := &sizedEntry[K]{key: key, val: val, size: size}
 +		el := c.ll.PushFront(e)
 +		c.items[key] = el
 +		c.cur += size
 +	}
 +	for c.cur > c.maxBytes && c.ll.Len() > 1 {
 +		c.removeElement(c.ll.Back())
 +		c.evictions.Add(1)
 +	}
 +}
++
 +// Delete removes key. No-op when absent.
 +func (c *SizedCache[K]) Delete(key K) {
 +	c.mu.Lock()
 +	defer c.mu.Unlock()
 +	if el, ok := c.items[key]; ok {
 +		c.removeElement(el)
 +	}
 +}
++
 +// Bytes reports the current total payload size.
 +func (c *SizedCache[K]) Bytes() int64 {
 +	c.mu.Lock()
 +	defer c.mu.Unlock()
 +	return c.cur
 +}
++
 +// Len reports the entry count.
 +func (c *SizedCache[K]) Len() int {
 +	c.mu.Lock()
 +	defer c.mu.Unlock()
 +	return c.ll.Len()
 +}
++
 +// Stats returns a snapshot of the hit / miss / eviction counters.
 +func (c *SizedCache[K]) Stats() Stats {
 +	return Stats{
 +		Hits:      c.hits.Load(),
 +		Misses:    c.misses.Load(),
 +		Evictions: c.evictions.Load(),
 +	}
 +}
++
 +func (c *SizedCache[K]) removeElement(el *list.Element) {
 +	e := el.Value.(*sizedEntry[K])
 +	c.ll.Remove(el)
 +	delete(c.items, e.key)
 +	c.cur -= e.size
 +}