Skip to content

Commit 7346008

Browse files
committed
Add metadata TTL and stale-while-revalidate support
Cached metadata is now served directly within a configurable TTL window (default 5m) without contacting upstream, reducing latency and upstream load. When upstream is unreachable and the cache is past its TTL, stale content is served with a Warning: 110 header per RFC 7234. New config: `metadata_ttl` (YAML) / `PROXY_METADATA_TTL` (env). Set to "0" to always revalidate with upstream.
1 parent c01f0a5 commit 7346008

7 files changed

Lines changed: 303 additions & 22 deletions

File tree

cmd/proxy/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,7 @@ func runMirror() {
453453
resolver := fetch.NewResolver()
454454
proxy := handler.NewProxy(db, store, fetcher, resolver, logger)
455455
proxy.CacheMetadata = true // mirror always caches metadata
456+
proxy.MetadataTTL = cfg.ParseMetadataTTL()
456457

457458
m := mirror.New(proxy, db, store, logger, *concurrency)
458459

docs/configuration.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,20 @@ Or via environment variable: `PROXY_CACHE_METADATA=true`.
225225

226226
The `proxy mirror` command always enables metadata caching regardless of this setting.
227227

228+
### Metadata TTL
229+
230+
When metadata caching is enabled, `metadata_ttl` controls how long a cached response is considered fresh before revalidating with upstream. During the TTL window, cached metadata is served directly without contacting upstream, reducing latency and upstream load.
231+
232+
```yaml
233+
metadata_ttl: "5m" # default
234+
```
235+
236+
Or via environment variable: `PROXY_METADATA_TTL=10m`.
237+
238+
Set to `"0"` to always revalidate with upstream (ETag-based conditional requests still avoid re-downloading unchanged content).
239+
240+
When upstream is unreachable and the cached entry is past its TTL, the proxy serves the stale cached copy with a `Warning: 110 - "Response is Stale"` header so clients can tell the data may be outdated.
241+
228242
## Mirror API
229243

230244
The `/api/mirror` endpoints are disabled by default. Enable them to allow starting mirror jobs via HTTP:

internal/config/config.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ import (
5555
"path/filepath"
5656
"strconv"
5757
"strings"
58+
"time"
5859

5960
"gopkg.in/yaml.v3"
6061
)
@@ -89,6 +90,11 @@ type Config struct {
8990
// The mirror command always enables this regardless of this setting.
9091
CacheMetadata bool `json:"cache_metadata" yaml:"cache_metadata"`
9192

93+
// MetadataTTL is how long cached metadata is considered fresh before
94+
// revalidating with upstream. Uses Go duration syntax (e.g. "5m", "1h").
95+
// Default: "5m". Set to "0" to always revalidate.
96+
MetadataTTL string `json:"metadata_ttl" yaml:"metadata_ttl"`
97+
9298
// MirrorAPI enables the /api/mirror endpoints for starting mirror jobs via HTTP.
9399
// Disabled by default to prevent unauthenticated users from triggering downloads.
94100
MirrorAPI bool `json:"mirror_api" yaml:"mirror_api"`
@@ -321,6 +327,9 @@ func (c *Config) LoadFromEnv() {
321327
if v := os.Getenv("PROXY_MIRROR_API"); v != "" {
322328
c.MirrorAPI = v == "true" || v == "1"
323329
}
330+
if v := os.Getenv("PROXY_METADATA_TTL"); v != "" {
331+
c.MetadataTTL = v
332+
}
324333
}
325334

326335
// Validate checks the configuration for errors.
@@ -370,9 +379,34 @@ func (c *Config) Validate() error {
370379
}
371380
}
372381

382+
// Validate metadata TTL if specified
383+
if c.MetadataTTL != "" && c.MetadataTTL != "0" {
384+
if _, err := time.ParseDuration(c.MetadataTTL); err != nil {
385+
return fmt.Errorf("invalid metadata_ttl %q: %w", c.MetadataTTL, err)
386+
}
387+
}
388+
373389
return nil
374390
}
375391

392+
const defaultMetadataTTL = 5 * time.Minute //nolint:mnd // sensible default
393+
394+
// ParseMetadataTTL returns the metadata TTL duration.
395+
// Returns 5 minutes if unset, 0 if explicitly disabled.
396+
func (c *Config) ParseMetadataTTL() time.Duration {
397+
if c.MetadataTTL == "" {
398+
return defaultMetadataTTL
399+
}
400+
if c.MetadataTTL == "0" {
401+
return 0
402+
}
403+
d, err := time.ParseDuration(c.MetadataTTL)
404+
if err != nil {
405+
return defaultMetadataTTL
406+
}
407+
return d
408+
}
409+
376410
// ParseSize parses a human-readable size string (e.g., "10GB", "500MB").
377411
// Returns the size in bytes.
378412
func ParseSize(s string) (int64, error) {

internal/config/config_test.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"os"
55
"path/filepath"
66
"testing"
7+
"time"
78
)
89

910
const (
@@ -301,3 +302,56 @@ func TestLoadFileNotFound(t *testing.T) {
301302
t.Error("expected error for nonexistent file")
302303
}
303304
}
305+
306+
func TestParseMetadataTTL(t *testing.T) {
307+
tests := []struct {
308+
name string
309+
ttl string
310+
want time.Duration
311+
}{
312+
{"empty defaults to 5m", "", 5 * time.Minute},
313+
{"explicit zero", "0", 0},
314+
{"10 minutes", "10m", 10 * time.Minute},
315+
{"1 hour", "1h", 1 * time.Hour},
316+
{"invalid defaults to 5m", "not-a-duration", 5 * time.Minute},
317+
}
318+
319+
for _, tt := range tests {
320+
t.Run(tt.name, func(t *testing.T) {
321+
cfg := Default()
322+
cfg.MetadataTTL = tt.ttl
323+
got := cfg.ParseMetadataTTL()
324+
if got != tt.want {
325+
t.Errorf("ParseMetadataTTL() = %v, want %v", got, tt.want)
326+
}
327+
})
328+
}
329+
}
330+
331+
func TestValidateMetadataTTL(t *testing.T) {
332+
cfg := Default()
333+
cfg.MetadataTTL = "invalid"
334+
if err := cfg.Validate(); err == nil {
335+
t.Error("expected validation error for invalid metadata_ttl")
336+
}
337+
338+
cfg.MetadataTTL = "5m"
339+
if err := cfg.Validate(); err != nil {
340+
t.Errorf("unexpected error for valid metadata_ttl: %v", err)
341+
}
342+
343+
cfg.MetadataTTL = "0"
344+
if err := cfg.Validate(); err != nil {
345+
t.Errorf("unexpected error for zero metadata_ttl: %v", err)
346+
}
347+
}
348+
349+
func TestLoadMetadataTTLFromEnv(t *testing.T) {
350+
cfg := Default()
351+
t.Setenv("PROXY_METADATA_TTL", "10m")
352+
cfg.LoadFromEnv()
353+
354+
if cfg.MetadataTTL != "10m" {
355+
t.Errorf("MetadataTTL = %q, want %q", cfg.MetadataTTL, "10m")
356+
}
357+
}

internal/handler/handler.go

Lines changed: 64 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ type Proxy struct {
6868
Logger *slog.Logger
6969
Cooldown *cooldown.Config
7070
CacheMetadata bool
71+
MetadataTTL time.Duration
7172
HTTPClient *http.Client
7273
}
7374

@@ -383,12 +384,31 @@ func (p *Proxy) FetchOrCacheMetadata(ctx context.Context, ecosystem, cacheKey, u
383384

384385
storagePath := metadataStoragePath(ecosystem, cacheKey)
385386

386-
// Check for existing cache entry (for ETag revalidation)
387+
// Check for existing cache entry (for ETag revalidation and TTL)
387388
var entry *database.MetadataCacheEntry
388389
if p.CacheMetadata && p.DB != nil {
389390
entry, _ = p.DB.GetMetadataCache(ecosystem, cacheKey)
390391
}
391392

393+
// Serve from cache if within TTL (skip upstream entirely)
394+
if entry != nil && p.MetadataTTL > 0 && entry.FetchedAt.Valid {
395+
if time.Since(entry.FetchedAt.Time) < p.MetadataTTL {
396+
cached, readErr := p.Storage.Open(ctx, entry.StoragePath)
397+
if readErr == nil {
398+
defer func() { _ = cached.Close() }()
399+
data, readErr := ReadMetadata(cached)
400+
if readErr == nil {
401+
ct := contentTypeJSON
402+
if entry.ContentType.Valid {
403+
ct = entry.ContentType.String
404+
}
405+
return data, ct, nil
406+
}
407+
}
408+
// Cache file missing/unreadable, fall through to upstream
409+
}
410+
}
411+
392412
accept := contentTypeJSON
393413
if len(acceptHeaders) > 0 && acceptHeaders[0] != "" {
394414
accept = acceptHeaders[0]
@@ -529,6 +549,37 @@ func (p *Proxy) cacheMetadataBlob(ctx context.Context, ecosystem, cacheKey, stor
529549
})
530550
}
531551

552+
// cachedMeta holds cache validators and freshness state from a metadata cache entry.
553+
type cachedMeta struct {
554+
etag string
555+
lastModified time.Time
556+
stale bool
557+
}
558+
559+
// lookupCachedMeta retrieves cache validators for a metadata entry.
560+
func (p *Proxy) lookupCachedMeta(ecosystem, cacheKey string) cachedMeta {
561+
if p.DB == nil {
562+
return cachedMeta{}
563+
}
564+
entry, err := p.DB.GetMetadataCache(ecosystem, cacheKey)
565+
if err != nil || entry == nil {
566+
return cachedMeta{}
567+
}
568+
var cm cachedMeta
569+
if entry.ETag.Valid {
570+
cm.etag = entry.ETag.String
571+
}
572+
if entry.LastModified.Valid {
573+
cm.lastModified = entry.LastModified.Time
574+
}
575+
// If FetchedAt is older than TTL, upstream must have failed and
576+
// we served from stale cache (successful fetches update FetchedAt).
577+
if p.MetadataTTL > 0 && entry.FetchedAt.Valid && time.Since(entry.FetchedAt.Time) > p.MetadataTTL {
578+
cm.stale = true
579+
}
580+
return cm
581+
}
582+
532583
// ProxyCached fetches metadata from upstream (with optional caching for offline fallback)
533584
// and writes it to the response. Optional acceptHeaders specify the Accept header to send.
534585
// When metadata caching is disabled, the response is streamed directly to avoid buffering
@@ -551,30 +602,18 @@ func (p *Proxy) ProxyCached(w http.ResponseWriter, r *http.Request, upstreamURL,
551602
return
552603
}
553604

554-
// Look up cache entry to get ETag and upstream Last-Modified for conditional response headers
555-
var etag string
556-
var lastModified time.Time
557-
if p.DB != nil {
558-
if entry, err := p.DB.GetMetadataCache(ecosystem, cacheKey); err == nil && entry != nil {
559-
if entry.ETag.Valid {
560-
etag = entry.ETag.String
561-
}
562-
if entry.LastModified.Valid {
563-
lastModified = entry.LastModified.Time
564-
}
565-
}
566-
}
605+
cm := p.lookupCachedMeta(ecosystem, cacheKey)
567606

568607
// Honor client conditional request headers
569-
if etag != "" {
570-
if match := r.Header.Get("If-None-Match"); match != "" && match == etag {
608+
if cm.etag != "" {
609+
if match := r.Header.Get("If-None-Match"); match != "" && match == cm.etag {
571610
w.WriteHeader(http.StatusNotModified)
572611
return
573612
}
574613
}
575-
if !lastModified.IsZero() {
614+
if !cm.lastModified.IsZero() {
576615
if ims := r.Header.Get("If-Modified-Since"); ims != "" {
577-
if t, err := http.ParseTime(ims); err == nil && !lastModified.After(t) {
616+
if t, err := http.ParseTime(ims); err == nil && !cm.lastModified.After(t) {
578617
w.WriteHeader(http.StatusNotModified)
579618
return
580619
}
@@ -583,11 +622,14 @@ func (p *Proxy) ProxyCached(w http.ResponseWriter, r *http.Request, upstreamURL,
583622

584623
w.Header().Set("Content-Type", contentType)
585624
w.Header().Set("Content-Length", strconv.Itoa(len(body)))
586-
if etag != "" {
587-
w.Header().Set("ETag", etag)
625+
if cm.etag != "" {
626+
w.Header().Set("ETag", cm.etag)
627+
}
628+
if !cm.lastModified.IsZero() {
629+
w.Header().Set("Last-Modified", cm.lastModified.UTC().Format(http.TimeFormat))
588630
}
589-
if !lastModified.IsZero() {
590-
w.Header().Set("Last-Modified", lastModified.UTC().Format(http.TimeFormat))
631+
if cm.stale {
632+
w.Header().Set("Warning", `110 - "Response is Stale"`)
591633
}
592634
w.WriteHeader(http.StatusOK)
593635
_, _ = w.Write(body)

0 commit comments

Comments
 (0)