Skip to content

Commit 8eee3af

Browse files
authored
fix: remove project label from projectstorage metrics to reduce cardinality (#558)
## Summary - Remove the `project` label from all three `projectstorage_*` metrics ## Problem The `project` label creates cardinality explosion in VictoriaMetrics production storage (datum-cloud/infra#2113). PVCs are at 93% capacity. | Metric | Before (per pod) | After (per pod) | |--------|-----------------|-----------------| | `projectstorage_first_ready_seconds` | 414 × 82 × 12 = 407K series | 82 × 12 = 984 series | | `projectstorage_child_creations_total` | 414 × 82 = 34K series | 82 series | | `projectstorage_reinitializing_errors_total` | 414 × 82 × 7 = 237K series | 82 × 7 = 574 series | | **Total** | **~678K / pod, ~6.1M across 9 pods** | **~1.6K / pod, ~14K across 9 pods** | **~430x reduction in cardinality.** ## What changed - Removed `project` from label dimensions on all three metrics - Removed `project` field from `instrumentedStorage` struct - Updated `recordFirstReady`, `incrReinit`, and `childCreations.WithLabelValues` call sites The distribution by `resource_group` and `resource_kind` is the useful signal for understanding storage init performance. Per-project granularity is not actionable and is the source of the cardinality problem. ## Test plan - [ ] `go build ./internal/apiserver/storage/project/` passes - [ ] Deploy to staging, verify metrics still emit with reduced labels - [ ] Confirm VictoriaMetrics series count drops after old series expire
2 parents f4a6dd1 + d54ebef commit 8eee3af

1 file changed

Lines changed: 17 additions & 19 deletions

File tree

  • internal/apiserver/storage/project

internal/apiserver/storage/project/mux.go

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,10 @@ var (
3535
childCreations = k8smetrics.NewCounterVec(
3636
&k8smetrics.CounterOpts{
3737
Name: "projectstorage_child_creations_total",
38-
Help: "Per-project child storage creations",
38+
Help: "Child storage creations by resource type",
3939
StabilityLevel: k8smetrics.ALPHA,
4040
},
41-
[]string{"project", "resource_group", "resource_kind"},
41+
[]string{"resource_group", "resource_kind"},
4242
)
4343

4444
firstReady = k8smetrics.NewHistogramVec(
@@ -48,7 +48,7 @@ var (
4848
Buckets: []float64{0.02, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10},
4949
StabilityLevel: k8smetrics.ALPHA,
5050
},
51-
[]string{"project", "resource_group", "resource_kind"},
51+
[]string{"resource_group", "resource_kind"},
5252
)
5353

5454
reinitErrors = k8smetrics.NewCounterVec(
@@ -57,7 +57,7 @@ var (
5757
Help: "Ops that hit 'storage is (re)initializing'",
5858
StabilityLevel: k8smetrics.ALPHA,
5959
},
60-
[]string{"project", "resource_group", "resource_kind", "verb"},
60+
[]string{"resource_group", "resource_kind", "verb"},
6161
)
6262
)
6363

@@ -69,13 +69,13 @@ func isReinitErr(err error) bool {
6969
return err != nil && strings.Contains(err.Error(), "storage is (re)initializing")
7070
}
7171

72-
func incrReinit(project, group, kind, verb string) {
73-
reinitErrors.WithLabelValues(project, group, kind, verb).Inc()
72+
func incrReinit(group, kind, verb string) {
73+
reinitErrors.WithLabelValues(group, kind, verb).Inc()
7474
}
7575

76-
func recordFirstReady(c *child, project, group, kind string) {
76+
func recordFirstReady(c *child, group, kind string) {
7777
c.readyOnce.Do(func() {
78-
firstReady.WithLabelValues(project, group, kind).
78+
firstReady.WithLabelValues(group, kind).
7979
Observe(time.Since(c.created).Seconds())
8080
})
8181
}
@@ -107,21 +107,20 @@ type decoratorArgs struct {
107107

108108
// instrumentedStorage wraps a storage.Interface to emit metrics once per child
109109
type instrumentedStorage struct {
110-
inner storage.Interface
111-
child *child
112-
project string
110+
inner storage.Interface
111+
child *child
113112

114113
// normalized labels
115114
group string // API group ("" => "core" when you query; we keep "" here)
116115
kind string // resource plural
117116
}
118117

119118
func (i *instrumentedStorage) markSuccess() {
120-
recordFirstReady(i.child, i.project, i.group, i.kind)
119+
recordFirstReady(i.child, i.group, i.kind)
121120
}
122121
func (i *instrumentedStorage) markReinit(verb string, err error) error {
123122
if isReinitErr(err) {
124-
incrReinit(i.project, i.group, i.kind, verb)
123+
incrReinit(i.group, i.kind, verb)
125124
}
126125
return err
127126
}
@@ -239,16 +238,15 @@ func (m *projectMux) childForProject(project string) (storage.Interface, error)
239238
// Wrap the child once with instrumentation.
240239
c := &child{s: s, destroy: destroy, created: time.Now()}
241240
wrapped := &instrumentedStorage{
242-
inner: s,
243-
child: c,
244-
project: project,
245-
group: m.args.resourceGroup,
246-
kind: m.args.resourceKind,
241+
inner: s,
242+
child: c,
243+
group: m.args.resourceGroup,
244+
kind: m.args.resourceKind,
247245
}
248246
c.s = wrapped
249247

250248
m.children[project] = c
251-
childCreations.WithLabelValues(project, m.args.resourceGroup, m.args.resourceKind).Inc()
249+
childCreations.WithLabelValues(m.args.resourceGroup, m.args.resourceKind).Inc()
252250

253251
// Bootstrap system namespace synchronously to prevent resource creation failures
254252
if project != "" && m.loopbackConfig != nil {

0 commit comments

Comments
 (0)