diff --git a/charts/operator/crds/monitoring.googleapis.com_operatorconfigs.yaml b/charts/operator/crds/monitoring.googleapis.com_operatorconfigs.yaml
index dc435aa295..b63e0e53b8 100644
--- a/charts/operator/crds/monitoring.googleapis.com_operatorconfigs.yaml
+++ b/charts/operator/crds/monitoring.googleapis.com_operatorconfigs.yaml
@@ -11,6 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
+---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
@@ -237,6 +239,279 @@ spec:
If no URL is provided, Alertmanager will point to the Google Cloud Metric Explorer page.
type: string
+ storage:
+ description: |-
+ Storage opts the managed Alertmanager into a PersistentVolumeClaim-backed
+ data directory. When unset, Alertmanager uses an ephemeral emptyDir volume
+ and all silences, notification log entries, and inhibitions are lost on
+ pod restart. When set, the operator creates a PVC in the operator
+ namespace and mounts it at the Alertmanager data path so this state
+ survives pod churn.
+
+ See https://github.com/GoogleCloudPlatform/prometheus-engine/issues/685.
+ properties:
+ volumeClaim:
+ description: |-
+ VolumeClaim describes the desired PersistentVolumeClaim. The
+ embedded structure exposes both `metadata` (so callers can attach
+ labels and annotations, e.g. for volume-snapshot tooling) and `spec`
+ (so every Kubernetes PVC field — accessModes, storageClassName,
+ resources, selector, volumeMode, dataSource, dataSourceRef — is
+ configurable). The operator overwrites the claim's name and
+ namespace; everything else is taken from the caller-provided spec
+ modulo Kubernetes-enforced immutability.
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ description: |-
+ EmbeddedObjectMetadata contains labels, annotations and finalizers
+ applied to the generated PersistentVolumeClaim. Other ObjectMeta
+ fields are ignored.
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Annotations applied to the generated resource. Useful for
+ integrations such as VolumeSnapshot controllers or storage-class
+ provisioners that read annotations from the claim.
+ type: object
+ finalizers:
+ description: |-
+ Finalizers applied to the generated resource on creation. The
+ operator does not strip user-managed finalizers it did not add, so
+ removing entries from this list does not remove them from the live
+ object.
+ items:
+ type: string
+ type: array
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Labels applied to the generated resource. Merged with the
+ operator's default labels; on conflict the operator's value wins.
+ type: object
+ type: object
+ spec:
+ description: |-
+ Spec defines the desired characteristics of the volume. At minimum,
+ `resources.requests.storage` must be set. See the Kubernetes
+ PersistentVolumeClaim documentation for the full field reference:
+ https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#persistentvolumeclaimspec-v1-core
+ properties:
+ accessModes:
+ description: |-
+ accessModes contains the desired access modes the volume should have.
+ More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ dataSource:
+ description: |-
+ dataSource field can be used to specify either:
+ * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot)
+ * An existing PVC (PersistentVolumeClaim)
+ If the provisioner or an external controller can support the specified data source,
+ it will create a new volume based on the contents of the specified data source.
+ When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef,
+ and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified.
+ If the namespace is specified, then dataSourceRef will not be copied to dataSource.
+ properties:
+ apiGroup:
+ description: |-
+ APIGroup is the group for the resource being referenced.
+ If APIGroup is not specified, the specified Kind must be in the core API group.
+ For any other third-party types, APIGroup is required.
+ type: string
+ kind:
+ description: Kind is the type of resource being referenced
+ type: string
+ name:
+ description: Name is the name of resource being referenced
+ type: string
+ required:
+ - kind
+ - name
+ type: object
+ x-kubernetes-map-type: atomic
+ dataSourceRef:
+ description: |-
+ dataSourceRef specifies the object from which to populate the volume with data, if a non-empty
+ volume is desired. This may be any object from a non-empty API group (non
+ core object) or a PersistentVolumeClaim object.
+ When this field is specified, volume binding will only succeed if the type of
+ the specified object matches some installed volume populator or dynamic
+ provisioner.
+ This field will replace the functionality of the dataSource field and as such
+ if both fields are non-empty, they must have the same value. For backwards
+ compatibility, when namespace isn't specified in dataSourceRef,
+ both fields (dataSource and dataSourceRef) will be set to the same
+ value automatically if one of them is empty and the other is non-empty.
+ When namespace is specified in dataSourceRef,
+ dataSource isn't set to the same value and must be empty.
+ There are three important differences between dataSource and dataSourceRef:
+ * While dataSource only allows two specific types of objects, dataSourceRef
+ allows any non-core object, as well as PersistentVolumeClaim objects.
+ * While dataSource ignores disallowed values (dropping them), dataSourceRef
+ preserves all values, and generates an error if a disallowed value is
+ specified.
+ * While dataSource only allows local objects, dataSourceRef allows objects
+ in any namespaces.
+ (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled.
+ (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled.
+ properties:
+ apiGroup:
+ description: |-
+ APIGroup is the group for the resource being referenced.
+ If APIGroup is not specified, the specified Kind must be in the core API group.
+ For any other third-party types, APIGroup is required.
+ type: string
+ kind:
+ description: Kind is the type of resource being referenced
+ type: string
+ name:
+ description: Name is the name of resource being referenced
+ type: string
+ namespace:
+ description: |-
+ Namespace is the namespace of resource being referenced
+ Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details.
+ (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled.
+ type: string
+ required:
+ - kind
+ - name
+ type: object
+ resources:
+ description: |-
+ resources represents the minimum resources the volume should have.
+ If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements
+ that are lower than previous value but must still be higher than capacity recorded in the
+ status field of the claim.
+ More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ selector:
+ description: selector is a label query over volumes to
+ consider for binding.
+ properties:
+ matchExpressions:
+ description: matchExpressions is a list of label selector
+ requirements. The requirements are ANDed.
+ items:
+ description: |-
+ A label selector requirement is a selector that contains values, a key, and an operator that
+ relates the key and values.
+ properties:
+ key:
+ description: key is the label key that the selector
+ applies to.
+ type: string
+ operator:
+ description: |-
+ operator represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists and DoesNotExist.
+ type: string
+ values:
+ description: |-
+ values is an array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. This array is replaced during a strategic
+ merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ matchLabels:
+ additionalProperties:
+ type: string
+ description: |-
+ matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
+ map is equivalent to an element of matchExpressions, whose key field is "key", the
+ operator is "In", and the values array contains only "value". The requirements are ANDed.
+ type: object
+ type: object
+ x-kubernetes-map-type: atomic
+ storageClassName:
+ description: |-
+ storageClassName is the name of the StorageClass required by the claim.
+ More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1
+ type: string
+ volumeAttributesClassName:
+ description: |-
+ volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim.
+ If specified, the CSI driver will create or update the volume with the attributes defined
+ in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName,
+ it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass
+ will be applied to the claim but it's not allowed to reset this field to empty string once it is set.
+ If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass
+ will be set by the persistentvolume controller if it exists.
+ If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be
+ set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource
+ exists.
+ More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/
+ (Alpha) Using this field requires the VolumeAttributesClass feature gate to be enabled.
+ type: string
+ volumeMode:
+ description: |-
+ volumeMode defines what type of volume is required by the claim.
+ Value of Filesystem is implied when not included in claim spec.
+ type: string
+ volumeName:
+ description: volumeName is the binding reference to the
+ PersistentVolume backing this claim.
+ type: string
+ type: object
+ type: object
+ required:
+ - volumeClaim
+ type: object
type: object
metadata:
type: object
diff --git a/doc/api.md b/doc/api.md
index 9e576cfcc6..4a460b388d 100644
--- a/doc/api.md
+++ b/doc/api.md
@@ -52,6 +52,12 @@ Resource Types:
LabelMapping
+AlertmanagerStorageSpec
+
+EmbeddedObjectMetadata
+
+EmbeddedPersistentVolumeClaim
+
ManagedAlertmanagerSpec
MonitoringCRD
@@ -1373,6 +1379,189 @@ be derived automatically.
If no URL is provided, Alertmanager will point to the Google Cloud Metric Explorer page.
+
+
+storage
+
+
+AlertmanagerStorageSpec
+
+
+ |
+
+ Storage opts the managed Alertmanager into a PersistentVolumeClaim-backed
+data directory. When unset, Alertmanager uses an ephemeral emptyDir volume
+and all silences, notification log entries, and inhibitions are lost on
+pod restart. When set, the operator creates a PVC in the operator
+namespace and mounts it at the Alertmanager data path so this state
+survives pod churn.
+See issue #685.
+ |
+
+
+
+
+AlertmanagerStorageSpec
+
+
+
+(Appears in: ManagedAlertmanagerSpec)
+
+
+
AlertmanagerStorageSpec configures persistent storage for the managed
+Alertmanager. The operator provisions a single PersistentVolumeClaim named
+"alertmanager-data" in the operator namespace using the supplied spec and
+mounts it at the Alertmanager data path. The managed Alertmanager runs with
+a single replica, so a ReadWriteOnce access mode is sufficient; multi-replica
+support would require migrating to volumeClaimTemplates and is out of scope
+here.
+
Changing this spec after creation triggers a rolling restart of the
+Alertmanager StatefulSet. Most PersistentVolumeClaim fields are immutable
+once the claim is bound — only resources.requests.storage can be
+expanded (and only if the StorageClass allows volume expansion). The
+operator logs and ignores shrink requests and other mutations to
+immutable fields; the existing PVC must be deleted manually to fully
+reset (silences will be lost).
+
+
+
+
+| Field |
+Description |
+
+
+
+
+
+volumeClaim
+
+
+EmbeddedPersistentVolumeClaim
+
+
+ |
+
+ VolumeClaim describes the desired PersistentVolumeClaim. The embedded
+structure exposes both metadata (so callers can attach labels and
+annotations, e.g. for volume-snapshot tooling) and spec (so every
+Kubernetes PVC field — accessModes, storageClassName, resources, selector,
+volumeMode, dataSource, dataSourceRef — is configurable). The operator
+overwrites the claim's name and namespace; everything else is taken from
+the caller-provided spec modulo Kubernetes-enforced immutability.
+ |
+
+
+
+
+EmbeddedPersistentVolumeClaim
+
+
+
+(Appears in: AlertmanagerStorageSpec)
+
+
+
EmbeddedPersistentVolumeClaim is a PersistentVolumeClaim definition
+embedded directly in a parent resource's spec. It mirrors prometheus-
+operator's type of the same name so user-facing YAML feels familiar.
+
+
+
+
+| Field |
+Description |
+
+
+
+
+
+metadata
+
+
+EmbeddedObjectMetadata
+
+
+ |
+
+ EmbeddedObjectMetadata contains labels, annotations and finalizers
+applied to the generated PersistentVolumeClaim. Other ObjectMeta
+fields are ignored.
+ |
+
+
+
+spec
+
+
+Kubernetes core/v1.PersistentVolumeClaimSpec
+
+
+ |
+
+ Spec defines the desired characteristics of the volume. At minimum,
+resources.requests.storage must be set.
+ |
+
+
+
+
+
+(Appears in: EmbeddedPersistentVolumeClaim)
+
+
+
EmbeddedObjectMetadata is a subset of metav1.ObjectMeta containing only
+the fields that make sense to set on an operator-managed child resource.
+Setting name or namespace here has no effect — the operator owns
+those.
+
+
+
+
+| Field |
+Description |
+
+
+
+
+
+labels
+
+map[string]string
+
+ |
+
+ Labels applied to the generated resource. Merged with the operator's
+default labels; on conflict the operator's value wins.
+ |
+
+
+
+annotations
+
+map[string]string
+
+ |
+
+ Annotations applied to the generated resource. Useful for integrations
+such as VolumeSnapshot controllers or storage-class provisioners that
+read annotations from the claim.
+ |
+
+
+
+finalizers
+
+[]string
+
+ |
+
+ Finalizers applied to the generated resource on creation. The operator
+does not strip user-managed finalizers it did not add, so removing
+entries from this list does not remove them from the live object.
+ |
+
diff --git a/pkg/operator/apis/monitoring/v1/operator_types.go b/pkg/operator/apis/monitoring/v1/operator_types.go
index a3fb792188..4fbc7009c0 100644
--- a/pkg/operator/apis/monitoring/v1/operator_types.go
+++ b/pkg/operator/apis/monitoring/v1/operator_types.go
@@ -289,6 +289,87 @@ type ManagedAlertmanagerSpec struct {
//
// If no URL is provided, Alertmanager will point to the Google Cloud Metric Explorer page.
ExternalURL string `json:"externalURL,omitempty"`
+ // Storage opts the managed Alertmanager into a PersistentVolumeClaim-backed
+ // data directory. When unset, Alertmanager uses an ephemeral emptyDir volume
+ // and all silences, notification log entries, and inhibitions are lost on
+ // pod restart. When set, the operator creates a PVC in the operator
+ // namespace and mounts it at the Alertmanager data path so this state
+ // survives pod churn.
+ //
+ // See https://github.com/GoogleCloudPlatform/prometheus-engine/issues/685.
+ Storage *AlertmanagerStorageSpec `json:"storage,omitempty"`
+}
+
+// AlertmanagerStorageSpec configures persistent storage for the managed
+// Alertmanager. The operator provisions a single PersistentVolumeClaim named
+// "alertmanager-data" in the operator namespace using the supplied spec and
+// mounts it at the Alertmanager data path. The managed Alertmanager runs with
+// a single replica, so a ReadWriteOnce access mode is sufficient; multi-replica
+// support would require migrating to volumeClaimTemplates and is out of scope
+// here.
+//
+// Changing this spec after creation triggers a rolling restart of the
+// Alertmanager StatefulSet. Most PersistentVolumeClaim fields are immutable
+// once the claim is bound — only `resources.requests.storage` can be
+// expanded (and only if the StorageClass allows volume expansion). The
+// operator logs and ignores shrink requests and other mutations to
+// immutable fields; the existing PVC must be deleted manually to fully
+// reset (silences will be lost).
+type AlertmanagerStorageSpec struct {
+ // VolumeClaim describes the desired PersistentVolumeClaim. The
+ // embedded structure exposes both `metadata` (so callers can attach
+ // labels and annotations, e.g. for volume-snapshot tooling) and `spec`
+ // (so every Kubernetes PVC field — accessModes, storageClassName,
+ // resources, selector, volumeMode, dataSource, dataSourceRef — is
+ // configurable). The operator overwrites the claim's name and
+ // namespace; everything else is taken from the caller-provided spec
+ // modulo Kubernetes-enforced immutability.
+ VolumeClaim EmbeddedPersistentVolumeClaim `json:"volumeClaim"`
+}
+
+// EmbeddedPersistentVolumeClaim is a PersistentVolumeClaim definition
+// embedded directly in a parent resource's spec. It mirrors prometheus-
+// operator's type of the same name so user-facing YAML feels familiar.
+//
+// Only ObjectMeta fields that customise the claim itself (labels,
+// annotations, finalizers) are honoured; name and namespace are owned by
+// the operator.
+type EmbeddedPersistentVolumeClaim struct {
+ metav1.TypeMeta `json:",inline"`
+
+ // EmbeddedObjectMetadata contains labels, annotations and finalizers
+ // applied to the generated PersistentVolumeClaim. Other ObjectMeta
+ // fields are ignored.
+ // +optional
+ EmbeddedObjectMetadata `json:"metadata,omitempty"`
+
+ // Spec defines the desired characteristics of the volume. At minimum,
+ // `resources.requests.storage` must be set. See the Kubernetes
+ // PersistentVolumeClaim documentation for the full field reference:
+ // https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#persistentvolumeclaimspec-v1-core
+ Spec corev1.PersistentVolumeClaimSpec `json:"spec,omitempty"`
+}
+
+// EmbeddedObjectMetadata is a subset of metav1.ObjectMeta containing only
+// the fields that make sense to set on an operator-managed child resource.
+// Setting `name` or `namespace` here has no effect — the operator owns
+// those.
+type EmbeddedObjectMetadata struct {
+ // Labels applied to the generated resource. Merged with the
+ // operator's default labels; on conflict the operator's value wins.
+ // +optional
+ Labels map[string]string `json:"labels,omitempty"`
+ // Annotations applied to the generated resource. Useful for
+ // integrations such as VolumeSnapshot controllers or storage-class
+ // provisioners that read annotations from the claim.
+ // +optional
+ Annotations map[string]string `json:"annotations,omitempty"`
+ // Finalizers applied to the generated resource on creation. The
+ // operator does not strip user-managed finalizers it did not add, so
+ // removing entries from this list does not remove them from the live
+ // object.
+ // +optional
+ Finalizers []string `json:"finalizers,omitempty"`
}
// AlertmanagerEndpoints defines a selection of a single Endpoints object
diff --git a/pkg/operator/apis/monitoring/v1/zz_generated.deepcopy.go b/pkg/operator/apis/monitoring/v1/zz_generated.deepcopy.go
index c6a5d372f2..d722b44836 100644
--- a/pkg/operator/apis/monitoring/v1/zz_generated.deepcopy.go
+++ b/pkg/operator/apis/monitoring/v1/zz_generated.deepcopy.go
@@ -661,6 +661,11 @@ func (in *ManagedAlertmanagerSpec) DeepCopyInto(out *ManagedAlertmanagerSpec) {
*out = new(corev1.SecretKeySelector)
(*in).DeepCopyInto(*out)
}
+ if in.Storage != nil {
+ in, out := &in.Storage, &out.Storage
+ *out = new(AlertmanagerStorageSpec)
+ (*in).DeepCopyInto(*out)
+ }
return
}
@@ -674,6 +679,77 @@ func (in *ManagedAlertmanagerSpec) DeepCopy() *ManagedAlertmanagerSpec {
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *AlertmanagerStorageSpec) DeepCopyInto(out *AlertmanagerStorageSpec) {
+ *out = *in
+ in.VolumeClaim.DeepCopyInto(&out.VolumeClaim)
+ return
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AlertmanagerStorageSpec.
+func (in *AlertmanagerStorageSpec) DeepCopy() *AlertmanagerStorageSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(AlertmanagerStorageSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *EmbeddedPersistentVolumeClaim) DeepCopyInto(out *EmbeddedPersistentVolumeClaim) {
+ *out = *in
+ out.TypeMeta = in.TypeMeta
+ in.EmbeddedObjectMetadata.DeepCopyInto(&out.EmbeddedObjectMetadata)
+ in.Spec.DeepCopyInto(&out.Spec)
+ return
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddedPersistentVolumeClaim.
+func (in *EmbeddedPersistentVolumeClaim) DeepCopy() *EmbeddedPersistentVolumeClaim {
+ if in == nil {
+ return nil
+ }
+ out := new(EmbeddedPersistentVolumeClaim)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *EmbeddedObjectMetadata) DeepCopyInto(out *EmbeddedObjectMetadata) {
+ *out = *in
+ if in.Labels != nil {
+ in, out := &in.Labels, &out.Labels
+ *out = make(map[string]string, len(*in))
+ for key, val := range *in {
+ (*out)[key] = val
+ }
+ }
+ if in.Annotations != nil {
+ in, out := &in.Annotations, &out.Annotations
+ *out = make(map[string]string, len(*in))
+ for key, val := range *in {
+ (*out)[key] = val
+ }
+ }
+ if in.Finalizers != nil {
+ in, out := &in.Finalizers, &out.Finalizers
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ return
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddedObjectMetadata.
+func (in *EmbeddedObjectMetadata) DeepCopy() *EmbeddedObjectMetadata {
+ if in == nil {
+ return nil
+ }
+ out := new(EmbeddedObjectMetadata)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MonitoringCondition) DeepCopyInto(out *MonitoringCondition) {
*out = *in
diff --git a/pkg/operator/operator_config.go b/pkg/operator/operator_config.go
index 61417e62d8..f7a32994cf 100644
--- a/pkg/operator/operator_config.go
+++ b/pkg/operator/operator_config.go
@@ -38,6 +38,7 @@ import (
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
+ "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
@@ -512,7 +513,235 @@ func (r *operatorConfigReconciler) ensureAlertmanagerStatefulSet(ctx context.Con
logger.Error(err, "Alertmanager StatefulSet does not exist")
return nil
}
- return err
+ if err != nil {
+ return err
+ }
+
+ return r.reconcileAlertmanagerStorage(ctx, &sset, spec.Storage)
+}
+
+// alertmanagerDataVolumeName is the name of the volume backing Alertmanager's
+// --storage.path. Matches manifests/operator.yaml. Used as both the volume
+// name on the StatefulSet pod template and the PVC name in the operator
+// namespace when persistent storage is configured.
+const alertmanagerDataVolumeName = "alertmanager-data"
+
+// reconcileAlertmanagerStorage swaps the Alertmanager data volume between an
+// ephemeral emptyDir and a PVC-backed claim depending on whether the user
+// configured persistent storage.
+//
+// When spec is non-nil, the operator owns a PVC named "alertmanager-data" in
+// the operator namespace and the StatefulSet's pod template references it via
+// `volumes[name=alertmanager-data].persistentVolumeClaim`. The PVC spec is
+// kept in sync with the user-provided spec, modulo Kubernetes' restriction
+// that most PVC fields are immutable after creation — for those the operator
+// logs a warning and leaves the existing PVC alone.
+//
+// When spec is nil, the StatefulSet falls back to the manifest default
+// (emptyDir) and any operator-owned PVC is left in place to avoid surprising
+// data loss; users wanting to reclaim storage should delete the PVC manually.
+func (r *operatorConfigReconciler) reconcileAlertmanagerStorage(ctx context.Context, sset *appsv1.StatefulSet, spec *monitoringv1.AlertmanagerStorageSpec) error {
+ logger, _ := logr.FromContext(ctx)
+
+ if spec == nil {
+ // Restore the manifest default emptyDir if a previous spec swapped
+ // it for a PVC reference. Doing this lets users disable persistence
+ // without manually editing the StatefulSet, at the cost of leaving
+ // the PVC behind (intentional — see godoc above).
+ return r.setAlertmanagerDataVolume(ctx, sset, corev1.Volume{
+ Name: alertmanagerDataVolumeName,
+ VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{}},
+ })
+ }
+
+ if err := r.ensureAlertmanagerPVC(ctx, spec); err != nil {
+ return fmt.Errorf("ensure alertmanager PVC: %w", err)
+ }
+
+ logger.Info("alertmanager storage reconciled", "claim", alertmanagerDataVolumeName, "namespace", r.opts.OperatorNamespace)
+
+ return r.setAlertmanagerDataVolume(ctx, sset, corev1.Volume{
+ Name: alertmanagerDataVolumeName,
+ VolumeSource: corev1.VolumeSource{
+ PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
+ ClaimName: alertmanagerDataVolumeName,
+ },
+ },
+ })
+}
+
+// ensureAlertmanagerPVC creates or updates the PVC backing Alertmanager's
+// data directory. Most PVC fields are immutable post-creation (access modes,
+// storage class, volume name); only the storage request is patchable via
+// Kubernetes' PVC resize support. We update only that field on existing
+// claims to avoid validation errors. Caller-supplied labels and annotations
+// are merged on every reconciliation so they can be added or updated after
+// the PVC is bound.
+func (r *operatorConfigReconciler) ensureAlertmanagerPVC(ctx context.Context, spec *monitoringv1.AlertmanagerStorageSpec) error {
+ logger, _ := logr.FromContext(ctx)
+
+ desiredSpec := spec.VolumeClaim.Spec.DeepCopy()
+ if len(desiredSpec.AccessModes) == 0 {
+ desiredSpec.AccessModes = []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}
+ }
+
+ pvcKey := client.ObjectKey{Namespace: r.opts.OperatorNamespace, Name: alertmanagerDataVolumeName}
+ var existing corev1.PersistentVolumeClaim
+ err := r.client.Get(ctx, pvcKey, &existing)
+ if apierrors.IsNotFound(err) {
+ pvc := corev1.PersistentVolumeClaim{
+ ObjectMeta: metav1.ObjectMeta{
+ Namespace: pvcKey.Namespace,
+ Name: pvcKey.Name,
+ Labels: mergeLabels(componentLabels(NameAlertmanager), spec.VolumeClaim.Labels),
+ Annotations: copyMap(spec.VolumeClaim.Annotations),
+ Finalizers: append([]string(nil), spec.VolumeClaim.Finalizers...),
+ },
+ Spec: *desiredSpec,
+ }
+ return r.client.Create(ctx, &pvc)
+ }
+ if err != nil {
+ return err
+ }
+
+ patch := existing.DeepCopy()
+ mutated := false
+
+ // Reconcile mutable metadata. Operator-owned label keys always win;
+ // every other user-supplied label/annotation is propagated. We do not
+ // remove keys the user previously set and has now removed — to do so
+ // safely we'd need to track owned keys explicitly, which is more
+ // surface than needed for the v1 of this feature.
+ if labels := mergeLabels(existing.Labels, spec.VolumeClaim.Labels, componentLabels(NameAlertmanager)); !mapsEqual(labels, existing.Labels) {
+ patch.Labels = labels
+ mutated = true
+ }
+ if anns := mergeLabels(existing.Annotations, spec.VolumeClaim.Annotations); !mapsEqual(anns, existing.Annotations) {
+ patch.Annotations = anns
+ mutated = true
+ }
+
+ // Only the storage request is mutable on a bound PVC. Anything else
+ // (access modes, storage class, selector) silently won't apply and the
+ // API server rejects the update — so log and skip.
+ wantStorage := desiredSpec.Resources.Requests[corev1.ResourceStorage]
+ gotStorage := existing.Spec.Resources.Requests[corev1.ResourceStorage]
+ switch wantStorage.Cmp(gotStorage) {
+ case 1:
+ if patch.Spec.Resources.Requests == nil {
+ patch.Spec.Resources.Requests = corev1.ResourceList{}
+ }
+ patch.Spec.Resources.Requests[corev1.ResourceStorage] = wantStorage
+ mutated = true
+ case -1:
+ logger.Info("ignoring requested PVC shrink; Kubernetes does not support PVC shrinking",
+ "have", gotStorage.String(), "want", wantStorage.String())
+ }
+
+ if !mutated {
+ return nil
+ }
+ return r.client.Patch(ctx, patch, client.MergeFrom(&existing))
+}
+
+// componentLabels returns the standard label set used by gmp-operator-managed
+// resources for a given component name. Kept local to operator_config.go to
+// avoid leaking into the broader API surface.
+func componentLabels(component string) map[string]string {
+ return map[string]string{
+ LabelAppName: component,
+ }
+}
+
+// mergeLabels merges any number of string maps. Later maps take precedence
+// over earlier ones, so operator-owned labels should be passed last to
+// override any user-supplied conflicting values.
+func mergeLabels(in ...map[string]string) map[string]string {
+ out := map[string]string{}
+ for _, m := range in {
+ maps.Copy(out, m)
+ }
+ if len(out) == 0 {
+ return nil
+ }
+ return out
+}
+
+func copyMap(m map[string]string) map[string]string {
+ if m == nil {
+ return nil
+ }
+ out := make(map[string]string, len(m))
+ maps.Copy(out, m)
+ return out
+}
+
+func mapsEqual(a, b map[string]string) bool {
+ if len(a) != len(b) {
+ return false
+ }
+ for k, v := range a {
+ if b[k] != v {
+ return false
+ }
+ }
+ return true
+}
+
+// setAlertmanagerDataVolume replaces the named volume on the Alertmanager
+// pod template. It is a no-op when the existing volume already matches the
+// desired source, so steady-state reconciliations don't churn the
+// StatefulSet. The mutation is sent as a strategic-merge patch off a
+// snapshot of the original object so the operator doesn't blast over
+// fields owned by another controller (e.g. addon-manager-set annotations)
+// and never races with concurrent writers via optimistic-concurrency
+// conflicts.
+func (r *operatorConfigReconciler) setAlertmanagerDataVolume(ctx context.Context, sset *appsv1.StatefulSet, desired corev1.Volume) error {
+ original := sset.DeepCopy()
+ for i, v := range sset.Spec.Template.Spec.Volumes {
+ if v.Name != desired.Name {
+ continue
+ }
+ if volumeSourcesEqual(v.VolumeSource, desired.VolumeSource) {
+ return nil
+ }
+ sset.Spec.Template.Spec.Volumes[i] = desired
+ return r.client.Patch(ctx, sset, client.MergeFrom(original))
+ }
+ // Volume not present — append. Should not happen with the shipped
+ // manifest, but keeps the operator self-healing if someone strips it.
+ sset.Spec.Template.Spec.Volumes = append(sset.Spec.Template.Spec.Volumes, desired)
+ return r.client.Patch(ctx, sset, client.MergeFrom(original))
+}
+
+// volumeSourcesEqual checks whether two VolumeSource values describe the
+// same underlying storage. It compares the kind of source plus every field
+// the operator manages (and might therefore need to correct on drift), so a
+// manually-edited `medium`, `sizeLimit`, or `readOnly` reconciles back to
+// the operator-desired shape rather than being silently preserved.
+func volumeSourcesEqual(a, b corev1.VolumeSource) bool {
+ switch {
+ case a.EmptyDir != nil && b.EmptyDir != nil:
+ if a.EmptyDir.Medium != b.EmptyDir.Medium {
+ return false
+ }
+ return resourceQuantityPtrEqual(a.EmptyDir.SizeLimit, b.EmptyDir.SizeLimit)
+ case a.PersistentVolumeClaim != nil && b.PersistentVolumeClaim != nil:
+ return a.PersistentVolumeClaim.ClaimName == b.PersistentVolumeClaim.ClaimName &&
+ a.PersistentVolumeClaim.ReadOnly == b.PersistentVolumeClaim.ReadOnly
+ }
+ return false
+}
+
+// resourceQuantityPtrEqual returns true when two *resource.Quantity pointers
+// describe the same quantity (or are both nil). Defined locally because
+// k8s.io/apimachinery does not expose a pointer-aware equality helper.
+func resourceQuantityPtrEqual(a, b *resource.Quantity) bool {
+ if a == nil || b == nil {
+ return a == b
+ }
+ return a.Cmp(*b) == 0
}
// ensureRuleEvaluatorDeployment reconciles the Deployment for rule-evaluator.
diff --git a/pkg/operator/operator_config_test.go b/pkg/operator/operator_config_test.go
index 7627e251ac..0f69bedfb7 100644
--- a/pkg/operator/operator_config_test.go
+++ b/pkg/operator/operator_config_test.go
@@ -15,6 +15,7 @@
package operator
import (
+ "fmt"
"testing"
monitoringv1 "github.com/GoogleCloudPlatform/prometheus-engine/pkg/operator/apis/monitoring/v1"
@@ -25,7 +26,10 @@ import (
"github.com/prometheus/prometheus/google/export"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v3"
+ appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
+ apierrors "k8s.io/apimachinery/pkg/api/errors"
+ "k8s.io/apimachinery/pkg/api/resource"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -433,3 +437,179 @@ route:
})
}
}
+
+func TestEnsureAlertmanagerStatefulSet_Storage(t *testing.T) {
+ operatorOpts := Options{
+ ProjectID: "test-project",
+ Location: "us-central1-c",
+ Cluster: "test-cluster",
+ PublicNamespace: DefaultPublicNamespace,
+ OperatorNamespace: DefaultOperatorNamespace,
+ }
+
+ newSset := func() *appsv1.StatefulSet {
+ return &appsv1.StatefulSet{
+ ObjectMeta: v1.ObjectMeta{
+ Namespace: DefaultOperatorNamespace,
+ Name: NameAlertmanager,
+ },
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Volumes: []corev1.Volume{
+ {
+ Name: alertmanagerDataVolumeName,
+ VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{}},
+ },
+ },
+ },
+ },
+ },
+ }
+ }
+
+ storageGB := func(gb int) corev1.PersistentVolumeClaimSpec {
+ return corev1.PersistentVolumeClaimSpec{
+ Resources: corev1.VolumeResourceRequirements{
+ Requests: corev1.ResourceList{
+ corev1.ResourceStorage: resource.MustParse(fmt.Sprintf("%dGi", gb)),
+ },
+ },
+ }
+ }
+
+ t.Run("nil storage leaves emptyDir intact", func(t *testing.T) {
+ ctx := t.Context()
+ sset := newSset()
+ kubeClient := newFakeClientBuilder().WithObjects(sset).Build()
+ reconciler := newOperatorConfigReconciler(kubeClient, operatorOpts)
+
+ require.NoError(t, reconciler.ensureAlertmanagerStatefulSet(ctx, &monitoringv1.ManagedAlertmanagerSpec{}))
+
+ var got appsv1.StatefulSet
+ require.NoError(t, kubeClient.Get(ctx, client.ObjectKeyFromObject(sset), &got))
+ require.NotNil(t, got.Spec.Template.Spec.Volumes[0].EmptyDir, "emptyDir volume must be preserved when no storage spec is set")
+ require.Nil(t, got.Spec.Template.Spec.Volumes[0].PersistentVolumeClaim)
+
+ // No PVC should have been created.
+ var pvc corev1.PersistentVolumeClaim
+ err := kubeClient.Get(ctx, client.ObjectKey{Namespace: DefaultOperatorNamespace, Name: alertmanagerDataVolumeName}, &pvc)
+ require.True(t, apierrors.IsNotFound(err), "PVC must not exist when storage is unset; got err=%v", err)
+ })
+
+ t.Run("storage set provisions PVC and swaps volume to PVC reference", func(t *testing.T) {
+ ctx := t.Context()
+ sset := newSset()
+ kubeClient := newFakeClientBuilder().WithObjects(sset).Build()
+ reconciler := newOperatorConfigReconciler(kubeClient, operatorOpts)
+
+ spec := &monitoringv1.ManagedAlertmanagerSpec{
+ Storage: &monitoringv1.AlertmanagerStorageSpec{
+ VolumeClaim: monitoringv1.EmbeddedPersistentVolumeClaim{
+ EmbeddedObjectMetadata: monitoringv1.EmbeddedObjectMetadata{
+ Labels: map[string]string{"team": "platform"},
+ Annotations: map[string]string{"backup.example.com/enabled": "true"},
+ },
+ Spec: storageGB(5),
+ },
+ },
+ }
+ require.NoError(t, reconciler.ensureAlertmanagerStatefulSet(ctx, spec))
+
+ var pvc corev1.PersistentVolumeClaim
+ require.NoError(t, kubeClient.Get(ctx, client.ObjectKey{Namespace: DefaultOperatorNamespace, Name: alertmanagerDataVolumeName}, &pvc))
+ require.Equal(t, []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, pvc.Spec.AccessModes, "access mode must default to ReadWriteOnce when caller omits it")
+ require.Equal(t, "5Gi", pvc.Spec.Resources.Requests.Storage().String())
+ require.Equal(t, "platform", pvc.Labels["team"])
+ require.Equal(t, NameAlertmanager, pvc.Labels[LabelAppName], "operator-owned label must be set")
+ require.Equal(t, "true", pvc.Annotations["backup.example.com/enabled"])
+
+ var got appsv1.StatefulSet
+ require.NoError(t, kubeClient.Get(ctx, client.ObjectKeyFromObject(sset), &got))
+ require.NotNil(t, got.Spec.Template.Spec.Volumes[0].PersistentVolumeClaim, "data volume must now reference the PVC")
+ require.Equal(t, alertmanagerDataVolumeName, got.Spec.Template.Spec.Volumes[0].PersistentVolumeClaim.ClaimName)
+ require.Nil(t, got.Spec.Template.Spec.Volumes[0].EmptyDir)
+ })
+
+ t.Run("expanding storage request patches the PVC", func(t *testing.T) {
+ ctx := t.Context()
+ sset := newSset()
+ // Pre-bind PVC at 5Gi to simulate a steady-state cluster.
+ existingPVC := &corev1.PersistentVolumeClaim{
+ ObjectMeta: v1.ObjectMeta{
+ Namespace: DefaultOperatorNamespace,
+ Name: alertmanagerDataVolumeName,
+ },
+ Spec: storageGB(5),
+ }
+ existingPVC.Spec.AccessModes = []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}
+ kubeClient := newFakeClientBuilder().WithObjects(sset, existingPVC).Build()
+ reconciler := newOperatorConfigReconciler(kubeClient, operatorOpts)
+
+ spec := &monitoringv1.ManagedAlertmanagerSpec{
+ Storage: &monitoringv1.AlertmanagerStorageSpec{
+ VolumeClaim: monitoringv1.EmbeddedPersistentVolumeClaim{Spec: storageGB(10)},
+ },
+ }
+ require.NoError(t, reconciler.ensureAlertmanagerStatefulSet(ctx, spec))
+
+ var pvc corev1.PersistentVolumeClaim
+ require.NoError(t, kubeClient.Get(ctx, client.ObjectKey{Namespace: DefaultOperatorNamespace, Name: alertmanagerDataVolumeName}, &pvc))
+ require.Equal(t, "10Gi", pvc.Spec.Resources.Requests.Storage().String(), "PVC must be expanded to match requested size")
+ })
+
+ t.Run("shrink request is ignored", func(t *testing.T) {
+ ctx := t.Context()
+ sset := newSset()
+ existingPVC := &corev1.PersistentVolumeClaim{
+ ObjectMeta: v1.ObjectMeta{
+ Namespace: DefaultOperatorNamespace,
+ Name: alertmanagerDataVolumeName,
+ },
+ Spec: storageGB(10),
+ }
+ existingPVC.Spec.AccessModes = []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}
+ kubeClient := newFakeClientBuilder().WithObjects(sset, existingPVC).Build()
+ reconciler := newOperatorConfigReconciler(kubeClient, operatorOpts)
+
+ spec := &monitoringv1.ManagedAlertmanagerSpec{
+ Storage: &monitoringv1.AlertmanagerStorageSpec{
+ VolumeClaim: monitoringv1.EmbeddedPersistentVolumeClaim{Spec: storageGB(2)},
+ },
+ }
+ require.NoError(t, reconciler.ensureAlertmanagerStatefulSet(ctx, spec))
+
+ var pvc corev1.PersistentVolumeClaim
+ require.NoError(t, kubeClient.Get(ctx, client.ObjectKey{Namespace: DefaultOperatorNamespace, Name: alertmanagerDataVolumeName}, &pvc))
+ require.Equal(t, "10Gi", pvc.Spec.Resources.Requests.Storage().String(), "PVC must not shrink; Kubernetes does not allow this")
+ })
+
+ t.Run("removing storage spec falls back to emptyDir and leaves PVC in place", func(t *testing.T) {
+ ctx := t.Context()
+ sset := newSset()
+ sset.Spec.Template.Spec.Volumes[0] = corev1.Volume{
+ Name: alertmanagerDataVolumeName,
+ VolumeSource: corev1.VolumeSource{
+ PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ClaimName: alertmanagerDataVolumeName},
+ },
+ }
+ existingPVC := &corev1.PersistentVolumeClaim{
+ ObjectMeta: v1.ObjectMeta{
+ Namespace: DefaultOperatorNamespace,
+ Name: alertmanagerDataVolumeName,
+ },
+ Spec: storageGB(5),
+ }
+ kubeClient := newFakeClientBuilder().WithObjects(sset, existingPVC).Build()
+ reconciler := newOperatorConfigReconciler(kubeClient, operatorOpts)
+
+ require.NoError(t, reconciler.ensureAlertmanagerStatefulSet(ctx, &monitoringv1.ManagedAlertmanagerSpec{}))
+
+ var got appsv1.StatefulSet
+ require.NoError(t, kubeClient.Get(ctx, client.ObjectKeyFromObject(sset), &got))
+ require.NotNil(t, got.Spec.Template.Spec.Volumes[0].EmptyDir, "removing storage spec must restore emptyDir")
+
+ var pvc corev1.PersistentVolumeClaim
+ require.NoError(t, kubeClient.Get(ctx, client.ObjectKey{Namespace: DefaultOperatorNamespace, Name: alertmanagerDataVolumeName}, &pvc), "PVC must remain so silences survive accidental config removal")
+ })
+}