Skip to content

Commit e602965

Browse files
committed
Add additional PrometheusRule for health statuses
1 parent 50d51d1 commit e602965

1 file changed

Lines changed: 59 additions & 10 deletions

File tree

controllers/argocd_metrics_controller.go

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -582,13 +582,6 @@ func newServiceMonitor(namespace, name, matchLabel string) *monitoringv1.Service
582582
}
583583

584584
func newPrometheusRule(namespace string) *monitoringv1.PrometheusRule {
585-
// The namespace used in the alert rule is not the namespace of the
586-
// running application, it is the namespace that the corresponding
587-
// ArgoCD application metadata was created in. This is needed to
588-
// scope this alert rule to only fire for applications managed
589-
// by the ArgoCD instance installed in this namespace.
590-
expr := fmt.Sprintf("argocd_app_info{namespace=\"%s\",sync_status=\"OutOfSync\"} > 0", namespace)
591-
592585
objectMeta := metav1.ObjectMeta{
593586
Name: alertRuleName,
594587
Namespace: namespace,
@@ -602,17 +595,73 @@ func newPrometheusRule(namespace string) *monitoringv1.PrometheusRule {
602595
Alert: "ArgoCDSyncAlert",
603596
Annotations: map[string]string{
604597
"summary": "Argo CD application is out of sync",
605-
"description": "Argo CD application {{ $labels.name }} is out of sync. Check ArgoCDSyncAlert status, this alert is designed to notify that an application managed by Argo CD is out of sync.",
598+
"description": "Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is out of sync. Check ArgoCDSyncAlert status, this alert is designed to notify that an application managed by Argo CD is out of sync.",
606599
},
607600
Expr: intstr.IntOrString{
608-
Type: intstr.String,
609-
StrVal: expr,
601+
Type: intstr.String,
602+
// The namespace used in the alert rule is not the namespace of the
603+
// running application, it is the namespace that the corresponding
604+
// ArgoCD application metadata was created in. This is needed to
605+
// scope this alert rule to only fire for applications managed
606+
// by the ArgoCD instance installed in this namespace.
607+
StrVal: fmt.Sprintf("argocd_app_info{namespace=\"%s\",sync_status=\"OutOfSync\"} > 0", namespace),
610608
},
611609
For: "5m",
612610
Labels: map[string]string{
613611
"severity": "warning",
614612
},
615613
},
614+
{
615+
Alert: "ArgoCDHealthAlert",
616+
Annotations: map[string]string{
617+
"summary": "Argo CD application is not healthy",
618+
"description": "Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is not healthy. Check ArgoCDHealthAlert status, this alert is designed to notify that an application managed by Argo CD is not in a healthy, suspended, progressing or degraded state.",
619+
},
620+
Expr: intstr.IntOrString{
621+
Type: intstr.String,
622+
// General warning of not healthy, this ignores the status of Healthy and
623+
// Suspended which are expected statuses. Degraded and Progressing are
624+
// handled by other rules below
625+
StrVal: fmt.Sprintf("argocd_app_info{namespace=\"%s\", health_status!~\"Healthy|Suspended|Progressing|Degraded\"} > 0", namespace),
626+
},
627+
For: "5m",
628+
Labels: map[string]string{
629+
"severity": "warning",
630+
},
631+
},
632+
{
633+
Alert: "ArgoCDDegradedAlert",
634+
Annotations: map[string]string{
635+
"summary": "Argo CD application is degraded",
636+
"description": "Argo CD application {{ $labels.namespace }}/{{ $labels.name }} is degraded. Check ArgoCDDegradedAlert status, this alert is designed to notify that an application managed by Argo CD is degraded.",
637+
},
638+
Expr: intstr.IntOrString{
639+
Type: intstr.String,
640+
// Specific warning of degraded state
641+
StrVal: fmt.Sprintf("argocd_app_info{namespace=\"%s\", health_status=\"Degraded\"} > 0", namespace),
642+
},
643+
For: "5m",
644+
Labels: map[string]string{
645+
"severity": "critical",
646+
},
647+
},
648+
{
649+
Alert: "ArgoCDProgressingAlert",
650+
Annotations: map[string]string{
651+
"summary": "Argo CD application has been progressing for more than 10 minutes",
652+
"description": "Argo CD application {{ $labels.namespace }}/{{ $labels.name }} has been progressing for more than 10 minutes. Check ArgoCDProgressingAlert status, this alert is designed to notify when an application is taking a long time to exit the Progressing state.",
653+
},
654+
Expr: intstr.IntOrString{
655+
Type: intstr.String,
656+
// This rule is used to notify when an application is stuck in the progressing
657+
// state for more then 10m.
658+
StrVal: fmt.Sprintf("argocd_app_info{namespace=\"%s\", health_status=\"Progressing\"} > 0", namespace),
659+
},
660+
For: "10m",
661+
Labels: map[string]string{
662+
"severity": "warning",
663+
},
664+
},
616665
},
617666
},
618667
},

0 commit comments

Comments
 (0)