Skip to content

Commit a454523

Browse files
alexluongclaude
andcommitted
feat: reflect disabled state in consecutive failure alert at threshold 100
After disabling a destination, update the consecutive failure alert's destination to include DisabledAt, so consumers see the post-disable state. Add test asserting this behavior. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 6c1c082 commit a454523

3 files changed

Lines changed: 95 additions & 14 deletions

File tree

internal/alert/monitor.go

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -150,19 +150,8 @@ func (m *alertMonitor) HandleAttempt(ctx context.Context, attempt DeliveryAttemp
150150
return nil
151151
}
152152

153-
alert := NewConsecutiveFailureAlert(ConsecutiveFailureData{
154-
TenantID: attempt.Destination.TenantID,
155-
Attempt: attempt.Attempt,
156-
Event: attempt.Event,
157-
Destination: attempt.Destination,
158-
ConsecutiveFailures: ConsecutiveFailures{
159-
Current: count,
160-
Max: m.autoDisableFailureCount,
161-
Threshold: level,
162-
},
163-
})
164-
165153
// If we've hit 100% and have a disabler configured, disable the destination
154+
alertDest := attempt.Destination
166155
if level == 100 && m.disabler != nil {
167156
disabledDest, err := m.disabler.DisableDestination(ctx, attempt.Destination.TenantID, attempt.Destination.ID)
168157
if err != nil {
@@ -173,17 +162,20 @@ func (m *alertMonitor) HandleAttempt(ctx context.Context, attempt DeliveryAttemp
173162
}
174163

175164
m.logger.Ctx(ctx).Audit("destination disabled",
165+
zap.String("attempt_id", attempt.Attempt.ID),
176166
zap.String("event_id", attempt.Event.ID),
177167
zap.String("tenant_id", attempt.Destination.TenantID),
178168
zap.String("destination_id", attempt.Destination.ID),
179169
zap.String("destination_type", attempt.Destination.Type),
180170
)
181171

172+
alertDest = AlertDestinationFromDestination(&disabledDest)
173+
182174
// Send destination disabled alert (best-effort, don't fail on notification error)
183175
if m.notifier != nil {
184176
disabledAlert := NewDestinationDisabledAlert(DestinationDisabledData{
185177
TenantID: attempt.Destination.TenantID,
186-
Destination: AlertDestinationFromDestination(&disabledDest),
178+
Destination: alertDest,
187179
DisabledAt: *disabledDest.DisabledAt,
188180
Reason: "consecutive_failure",
189181
Attempt: attempt.Attempt,
@@ -199,6 +191,18 @@ func (m *alertMonitor) HandleAttempt(ctx context.Context, attempt DeliveryAttemp
199191
}
200192
}
201193

194+
alert := NewConsecutiveFailureAlert(ConsecutiveFailureData{
195+
TenantID: attempt.Destination.TenantID,
196+
Attempt: attempt.Attempt,
197+
Event: attempt.Event,
198+
Destination: alertDest,
199+
ConsecutiveFailures: ConsecutiveFailures{
200+
Current: count,
201+
Max: m.autoDisableFailureCount,
202+
Threshold: level,
203+
},
204+
})
205+
202206
// Send alert if notifier is configured (best-effort, don't fail on notification error)
203207
if m.notifier != nil {
204208
if err := m.notifier.Notify(ctx, alert); err != nil {

internal/alert/monitor_test.go

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,13 @@ func TestAlertMonitor_ConsecutiveFailures_MaxFailures(t *testing.T) {
8484
consecutiveFailureCount++
8585
cf := cfAlert.Data.ConsecutiveFailures
8686
require.Contains(t, []int{10, 14, 18, 20}, cf.Current, "Alert should be sent at 50%, 66%, 90%, and 100% thresholds")
87-
require.Equal(t, dest, cfAlert.Data.Destination)
87+
require.Equal(t, dest.ID, cfAlert.Data.Destination.ID)
88+
require.Equal(t, dest.TenantID, cfAlert.Data.Destination.TenantID)
89+
if cf.Threshold == 100 {
90+
require.NotNil(t, cfAlert.Data.Destination.DisabledAt, "Destination should have DisabledAt at threshold 100")
91+
} else {
92+
require.Nil(t, cfAlert.Data.Destination.DisabledAt, "Destination should not have DisabledAt below threshold 100")
93+
}
8894
require.Equal(t, "alert.destination.consecutive_failure", cfAlert.Topic)
8995
require.Equal(t, "attempt_1", cfAlert.Data.Attempt.ID)
9096
require.Equal(t, 20, cf.Max)
@@ -357,3 +363,68 @@ func TestAlertMonitor_SendsDestinationDisabledAlert(t *testing.T) {
357363
assert.Equal(t, event.ID, destinationDisabledAlert.Data.Event.ID, "Event ID should match")
358364
assert.Equal(t, event.Topic, destinationDisabledAlert.Data.Event.Topic, "Event Topic should match")
359365
}
366+
367+
func TestAlertMonitor_ConsecutiveFailureAlert_ReflectsDisabledDestination(t *testing.T) {
368+
// Tests that the consecutive failure alert at threshold 100 includes
369+
// the destination with DisabledAt set, reflecting the post-disable state.
370+
t.Parallel()
371+
ctx := context.Background()
372+
logger := testutil.CreateTestLogger(t)
373+
redisClient := testutil.CreateTestRedisClient(t)
374+
notifier := &mockAlertNotifier{}
375+
notifier.On("Notify", mock.Anything, mock.Anything).Return(nil)
376+
377+
disabledAt := time.Now()
378+
modelsDest := testutil.DestinationFactory.Any(
379+
testutil.DestinationFactory.WithID("dest_reflect"),
380+
testutil.DestinationFactory.WithTenantID("tenant_reflect"),
381+
)
382+
modelsDest.DisabledAt = &disabledAt
383+
disabler := &mockDestinationDisabler{}
384+
disabler.On("DisableDestination", mock.Anything, mock.Anything, mock.Anything).Return(modelsDest, nil)
385+
386+
autoDisableCount := 5
387+
monitor := alert.NewAlertMonitor(
388+
logger,
389+
redisClient,
390+
alert.WithNotifier(notifier),
391+
alert.WithDisabler(disabler),
392+
alert.WithAutoDisableFailureCount(autoDisableCount),
393+
alert.WithAlertThresholds([]int{100}),
394+
)
395+
396+
dest := &alert.AlertDestination{ID: "dest_reflect", TenantID: "tenant_reflect"}
397+
event := testutil.EventFactory.AnyPointer(
398+
testutil.EventFactory.WithID("event_reflect"),
399+
)
400+
attempt := alert.DeliveryAttempt{
401+
Event: event,
402+
Destination: dest,
403+
Attempt: testutil.AttemptFactory.AnyPointer(
404+
testutil.AttemptFactory.WithID("attempt_reflect"),
405+
testutil.AttemptFactory.WithStatus("failed"),
406+
testutil.AttemptFactory.WithCode("500"),
407+
),
408+
}
409+
410+
for i := 1; i <= autoDisableCount; i++ {
411+
require.NoError(t, monitor.HandleAttempt(ctx, attempt))
412+
}
413+
414+
// Find the consecutive failure alert at threshold 100
415+
var found bool
416+
for _, call := range notifier.Calls {
417+
if call.Method == "Notify" {
418+
if cfAlert, ok := call.Arguments.Get(1).(alert.ConsecutiveFailureAlert); ok {
419+
if cfAlert.Data.ConsecutiveFailures.Threshold == 100 {
420+
found = true
421+
// The destination in the alert should reflect the disabled state
422+
require.NotNil(t, cfAlert.Data.Destination.DisabledAt, "Destination in consecutive failure alert at threshold 100 should have DisabledAt set")
423+
assert.Equal(t, disabledAt, *cfAlert.Data.Destination.DisabledAt, "DisabledAt should match")
424+
break
425+
}
426+
}
427+
}
428+
}
429+
require.True(t, found, "Should have found a consecutive failure alert at threshold 100")
430+
}

internal/util/testutil/event.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,12 @@ func (f *mockAttemptFactory) WithStatus(status string) func(*models.Attempt) {
163163
}
164164
}
165165

166+
func (f *mockAttemptFactory) WithCode(code string) func(*models.Attempt) {
167+
return func(attempt *models.Attempt) {
168+
attempt.Code = code
169+
}
170+
}
171+
166172
func (f *mockAttemptFactory) WithTime(time time.Time) func(*models.Attempt) {
167173
return func(attempt *models.Attempt) {
168174
attempt.Time = time

0 commit comments

Comments
 (0)