Skip to content

Commit 5499734

Browse files
committed
Fix hash evaluation
If hash evaluation fails because of missing referenced resource, continue by returning nil instead of failing. This will make sure the error is reported in the clusterSummary status. This PR also changes the logic in clusterSummary controller `scope.Close()` When a conflict is detected in scope.Close(), instead of returning nothing, set `result = ctrl.Result{RequeueAfter: time.Minute}`. This ensures re-reconciliation without bypassing the NextReconcileTime backoff (since on the success path no setNextReconcileTime is called, so skipReconciliation won't block the 1-minute requeue) Test failure in the tier-change scenario: 1. Tier changes → ClusterSummary spec updated → reconcile fires 2. Deployment succeeds (no setNextReconcileTime called on success path → no cooldown set) 3. scope.Close() tries to patch status → conflict (the ClusterSummary was modified between read and patch by the controller's own in-flight logic) 4. Old (pre PR): conflict swallowed → no requeue scheduled, no watch event comes → status never reaches Provisioned → test times out
1 parent 7e63ea1 commit 5499734

11 files changed

Lines changed: 148 additions & 60 deletions

api/v1beta1/zz_generated.deepcopy.go

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cmd/main.go

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -781,16 +781,11 @@ func runInitContainerWork(ctx context.Context, config *rest.Config,
781781

782782
func setupLogging() {
783783
klog.InitFlags(nil)
784-
_ = flag.Set("logtostderr", "false") // set default, but still overridable via CLI
784+
785785
initFlags(pflag.CommandLine)
786786
pflag.CommandLine.SetNormalizeFunc(cliflag.WordSepNormalizeFunc)
787787
pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
788788
pflag.Parse()
789789

790-
if flag.Lookup("logtostderr").Value.String() == "false" {
791-
klog.SetOutputBySeverity("INFO", os.Stdout)
792-
klog.SetOutputBySeverity("WARNING", os.Stdout)
793-
klog.SetOutputBySeverity("ERROR", os.Stderr)
794-
klog.SetOutputBySeverity("FATAL", os.Stderr)
795-
}
790+
ctrl.SetLogger(klog.Background())
796791
}

controllers/clustersummary_controller.go

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ type ClusterSummaryReconciler struct {
155155
//+kubebuilder:rbac:groups="source.toolkit.fluxcd.io",resources=buckets,verbs=get;watch;list
156156
//+kubebuilder:rbac:groups="source.toolkit.fluxcd.io",resources=buckets/status,verbs=get;watch;list
157157

158-
func (r *ClusterSummaryReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
158+
func (r *ClusterSummaryReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, reterr error) {
159159
logger := ctrl.LoggerFrom(ctx)
160160
logger.V(logs.LogDebug).Info("Reconciling")
161161

@@ -210,14 +210,17 @@ func (r *ClusterSummaryReconciler) Reconcile(ctx context.Context, req ctrl.Reque
210210
}
211211

212212
// Always close the scope when exiting this function so we can persist any ClusterSummary
213-
// changes. Conflict errors are swallowed because the watch event from whatever caused the
214-
// conflict will re-enqueue this resource, and the next reconciliation will recompute status.
215-
// Propagating the conflict would cause controller-runtime to immediately requeue, bypassing
216-
// the intended NextReconcileTime backoff.
213+
// changes. Conflict errors are swallowed and a 1-minute requeue is scheduled instead of
214+
// propagating the error (which would cause controller-runtime to immediately requeue,
215+
// bypassing the intended NextReconcileTime backoff). We cannot rely solely on a watch event
216+
// to re-enqueue because the conflict may be caused by the controller's own logic (e.g. a
217+
// tier change), in which case no further watch event will arrive.
217218
defer func() {
218219
if err = clusterSummaryScope.Close(ctx); err != nil {
219220
if apierrors.IsConflict(err) {
220-
logger.V(logs.LogDebug).Info("conflict patching ClusterSummary status, will reconcile on next event")
221+
logger.V(logs.LogDebug).Info("conflict patching ClusterSummary status, will retry in 1 minute")
222+
r.setNextReconcileTime(clusterSummaryScope, time.Minute)
223+
result = ctrl.Result{RequeueAfter: time.Minute}
221224
return
222225
}
223226
reterr = err
@@ -295,6 +298,11 @@ func (r *ClusterSummaryReconciler) updateDeletedInstancs(clusterSummaryScope *sc
295298
Namespace: clusterSummaryScope.Namespace(),
296299
Name: clusterSummaryScope.Name(),
297300
}] = time.Now()
301+
302+
delete(r.NextReconcileTimes, types.NamespacedName{
303+
Namespace: clusterSummaryScope.Namespace(),
304+
Name: clusterSummaryScope.Name(),
305+
})
298306
}
299307

300308
func (r *ClusterSummaryReconciler) reconcileDelete(

controllers/clustersummary_deployer.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ func (r *ClusterSummaryReconciler) deployFeature(ctx context.Context, clusterSum
9898
// Get hash of current configuration (at this very precise moment)
9999
currentHash, err := f.currentHash(ctx, r.Client, clusterSummary, logger)
100100
if err != nil {
101-
if !apierrors.IsNotFound(err) {
101+
var nrErr *configv1beta1.NonRetriableError
102+
if !errors.As(err, &nrErr) && !apierrors.IsNotFound(err) {
102103
return err
103104
}
104105
}

controllers/controllers_suite_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,11 @@ var _ = AfterSuite(func() {
171171

172172
func getClusterSummaryReconciler(c client.Client, dep deployer.DeployerInterface) *controllers.ClusterSummaryReconciler {
173173
return &controllers.ClusterSummaryReconciler{
174-
Client: c,
175-
Scheme: scheme,
176-
Deployer: dep,
177-
ClusterMap: make(map[corev1.ObjectReference]*libsveltosset.Set),
178-
ReferenceMap: make(map[corev1.ObjectReference]*libsveltosset.Set),
174+
Client: c,
175+
Scheme: scheme,
176+
Deployer: dep,
177+
ClusterMap: make(map[corev1.ObjectReference]*libsveltosset.Set),
178+
ReferenceMap: make(map[corev1.ObjectReference]*libsveltosset.Set),
179179
DeletedInstances: make(map[types.NamespacedName]time.Time),
180180
NextReconcileTimes: make(map[types.NamespacedName]time.Time),
181181
PolicyMux: sync.Mutex{},

controllers/handlers_kustomize.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@ func kustomizationHash(ctx context.Context, c client.Client, clusterSummary *con
367367
continue
368368
}
369369
config += string(result)
370+
config += fmt.Sprintf("%d", kustomizationRef.Tier)
370371

371372
valueFromHash, err := getKustomizeReferenceResourceHash(ctx, c, clusterSummary,
372373
kustomizationRef, logger)

controllers/handlers_resources.go

Lines changed: 43 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -533,34 +533,7 @@ func resourcesHash(ctx context.Context, c client.Client, clusterSummary *configv
533533
hash := sha256.Sum256(raw)
534534
config += hex.EncodeToString((hash[:]))
535535

536-
referencedObjects := make([]corev1.ObjectReference, 0, len(clusterSummary.Spec.ClusterProfileSpec.PolicyRefs))
537-
for i := range sortedPolicyRefs {
538-
reference := &sortedPolicyRefs[i]
539-
namespace, err := libsveltostemplate.GetReferenceResourceNamespace(ctx, c,
540-
clusterSummary.Spec.ClusterNamespace, clusterSummary.Spec.ClusterName, reference.Namespace,
541-
clusterSummary.Spec.ClusterType)
542-
if err != nil {
543-
logger.V(logs.LogInfo).Info(fmt.Sprintf("failed to instantiate namespace for %s %s/%s: %v",
544-
reference.Kind, reference.Namespace, reference.Name, err))
545-
// Ignore template instantiation error
546-
continue
547-
}
548-
549-
name, err := libsveltostemplate.GetReferenceResourceName(ctx, c, clusterSummary.Spec.ClusterNamespace,
550-
clusterSummary.Spec.ClusterName, reference.Name, clusterSummary.Spec.ClusterType)
551-
if err != nil {
552-
logger.V(logs.LogInfo).Info(fmt.Sprintf("failed to instantiate name for %s %s/%s: %v",
553-
reference.Kind, reference.Namespace, reference.Name, err))
554-
// Ignore template instantiation error
555-
continue
556-
}
557-
558-
referencedObjects = append(referencedObjects, corev1.ObjectReference{
559-
Kind: sortedPolicyRefs[i].Kind,
560-
Namespace: namespace,
561-
Name: name,
562-
})
563-
}
536+
referencedObjects, referencedObjectTiers := getInstantiatedPolicyRefInfo(ctx, c, clusterSummary, sortedPolicyRefs, logger)
564537

565538
sort.Sort(dependencymanager.SortedCorev1ObjectReference(referencedObjects))
566539

@@ -573,12 +546,14 @@ func resourcesHash(ctx context.Context, c client.Client, clusterSummary *configv
573546
err = c.Get(ctx, types.NamespacedName{Namespace: reference.Namespace, Name: reference.Name}, configmap)
574547
if err == nil {
575548
config += getConfigMapHash(configmap)
549+
config += fmt.Sprintf("%d", referencedObjectTiers[*reference])
576550
}
577551
} else if reference.Kind == string(libsveltosv1beta1.SecretReferencedResourceKind) {
578552
secret := &corev1.Secret{}
579553
err = c.Get(ctx, types.NamespacedName{Namespace: reference.Namespace, Name: reference.Name}, secret)
580554
if err == nil {
581555
config += getSecretHash(secret)
556+
config += fmt.Sprintf("%d", referencedObjectTiers[*reference])
582557
}
583558
} else {
584559
var source client.Object
@@ -591,6 +566,7 @@ func resourcesHash(ctx context.Context, c client.Client, clusterSummary *configv
591566
if source.GetAnnotations() != nil {
592567
config += getDataSectionHash(source.GetAnnotations())
593568
}
569+
config += fmt.Sprintf("%d", referencedObjectTiers[*reference])
594570
}
595571
}
596572
if err != nil {
@@ -623,6 +599,45 @@ func resourcesHash(ctx context.Context, c client.Client, clusterSummary *configv
623599
return h.Sum(nil), nil
624600
}
625601

602+
func getInstantiatedPolicyRefInfo(ctx context.Context, c client.Client, clusterSummary *configv1beta1.ClusterSummary,
603+
sortedPolicyRefs []configv1beta1.PolicyRef, logger logr.Logger,
604+
) (referencedObjects []corev1.ObjectReference, referencedObjectTiers map[corev1.ObjectReference]int32) {
605+
606+
referencedObjects = make([]corev1.ObjectReference, 0, len(clusterSummary.Spec.ClusterProfileSpec.PolicyRefs))
607+
referencedObjectTiers = make(map[corev1.ObjectReference]int32, len(clusterSummary.Spec.ClusterProfileSpec.PolicyRefs))
608+
for i := range sortedPolicyRefs {
609+
reference := &sortedPolicyRefs[i]
610+
namespace, err := libsveltostemplate.GetReferenceResourceNamespace(ctx, c,
611+
clusterSummary.Spec.ClusterNamespace, clusterSummary.Spec.ClusterName, reference.Namespace,
612+
clusterSummary.Spec.ClusterType)
613+
if err != nil {
614+
logger.V(logs.LogInfo).Info(fmt.Sprintf("failed to instantiate namespace for %s %s/%s: %v",
615+
reference.Kind, reference.Namespace, reference.Name, err))
616+
// Ignore template instantiation error
617+
continue
618+
}
619+
620+
name, err := libsveltostemplate.GetReferenceResourceName(ctx, c, clusterSummary.Spec.ClusterNamespace,
621+
clusterSummary.Spec.ClusterName, reference.Name, clusterSummary.Spec.ClusterType)
622+
if err != nil {
623+
logger.V(logs.LogInfo).Info(fmt.Sprintf("failed to instantiate name for %s %s/%s: %v",
624+
reference.Kind, reference.Namespace, reference.Name, err))
625+
// Ignore template instantiation error
626+
continue
627+
}
628+
629+
obj := corev1.ObjectReference{
630+
Kind: sortedPolicyRefs[i].Kind,
631+
Namespace: namespace,
632+
Name: name,
633+
}
634+
referencedObjects = append(referencedObjects, obj)
635+
referencedObjectTiers[obj] = reference.Tier
636+
}
637+
638+
return referencedObjects, referencedObjectTiers
639+
}
640+
626641
func getResourceRefs(clusterSummary *configv1beta1.ClusterSummary) []configv1beta1.PolicyRef {
627642
return clusterSummary.Spec.ClusterProfileSpec.PolicyRefs
628643
}

controllers/profile_utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ func cleanClusterConfigurations(ctx context.Context, c client.Client, profileSco
398398
}
399399

400400
err = cleanClusterConfiguration(ctx, c, profileScope.Profile, cc)
401-
if err != nil {
401+
if err != nil && !apierrors.IsNotFound(err) {
402402
return err
403403
}
404404
}

controllers/utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ func isNamespaced(ctx context.Context, r *unstructured.Unstructured, clusterName
236236
// 2. RETRY LOOP: Give it 3 attempts with increasing wait times
237237
// Total wait time: 1s + 2s + 3s = 6 seconds.
238238
for i := range 3 {
239-
// Log that we are attempting a refresh (MGIANLUC style)
239+
// Log that we are attempting a refresh
240240
logger.V(logs.LogInfo).Info(fmt.Sprintf("GVK %s not found, refreshing discovery (attempt %d)", gvk.String(), i+1))
241241

242242
// IMPORTANT: Invalidate the Discovery Client FIRST, then Reset the Mapper

test/fv/second_tier_test.go

Lines changed: 71 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ var _ = Describe("PolicyRef Tier", func() {
135135
currentServiceAccount)
136136
}, timeout, pollingInterval).Should(BeNil())
137137

138-
Byf("Verifying ServicdAccount has proper labels")
138+
Byf("Verifying ServiceAccount has proper labels")
139139
currentServiceAccount := &corev1.ServiceAccount{}
140140
Expect(workloadClient.Get(context.TODO(),
141141
types.NamespacedName{Namespace: saNamespace, Name: saName},
@@ -144,6 +144,8 @@ var _ = Describe("PolicyRef Tier", func() {
144144
v, ok := currentServiceAccount.Labels[firstConfigMapLabelKey]
145145
Expect(ok).To(BeTrue())
146146
Expect(v).To(Equal(firstConfigMapLabelValue))
147+
v, ok = currentServiceAccount.Labels[secondConfigMapLabelKey]
148+
Expect(ok).To(BeFalse())
147149

148150
Byf("Verifying ClusterSummary %s status reports conflict for Resources feature", clusterSummary.Name)
149151
Eventually(func() bool {
@@ -165,7 +167,7 @@ var _ = Describe("PolicyRef Tier", func() {
165167
return false
166168
}, timeout, pollingInterval).Should(BeTrue())
167169

168-
By("Updating second ConfigMap tier")
170+
By(fmt.Sprintf("Updating ConfigMap %s/%s tier", secondConfigMap.Namespace, secondConfigMap.Name))
169171
const lowerTier = 90
170172
err = retry.RetryOnConflict(retry.DefaultRetry, func() error {
171173
Expect(k8sClient.Get(context.TODO(),
@@ -177,18 +179,22 @@ var _ = Describe("PolicyRef Tier", func() {
177179
Name: firstConfigMap.Name,
178180
},
179181
{
180-
Tier: lowerTier,
181182
Kind: string(libsveltosv1beta1.ConfigMapReferencedResourceKind),
182183
Namespace: secondConfigMap.Namespace,
183184
Name: secondConfigMap.Name,
185+
Tier: lowerTier,
184186
},
185187
}
186188
return k8sClient.Update(context.TODO(), currentClusterProfile)
187189
})
188190
Expect(err).To(BeNil())
189191

190-
Byf("Verifying ClusterSummary %s status is set to Deployed for Resources feature", clusterSummary.Name)
191-
verifyFeatureStatusIsProvisioned(kindWorkloadCluster.GetNamespace(), clusterSummary.Name, libsveltosv1beta1.FeatureResources)
192+
Expect(k8sClient.Get(context.TODO(),
193+
types.NamespacedName{Name: clusterProfile.Name}, currentClusterProfile)).To(Succeed())
194+
195+
clusterSummary = verifyClusterSummary(clusterops.ClusterProfileLabelName,
196+
currentClusterProfile.Name, &currentClusterProfile.Spec,
197+
kindWorkloadCluster.GetNamespace(), kindWorkloadCluster.GetName(), getClusterType())
192198

193199
Byf("Verifying proper ServiceAccount is still present in the workload cluster with correct labels")
194200
Eventually(func() bool {
@@ -203,12 +209,63 @@ var _ = Describe("PolicyRef Tier", func() {
203209
if currentServiceAccount.Labels == nil {
204210
return false
205211
}
212+
_, ok = currentServiceAccount.Labels[firstConfigMapLabelKey]
213+
if ok {
214+
return false
215+
}
206216
v, ok = currentServiceAccount.Labels[secondConfigMapLabelKey]
207217
return ok && v == secondConfigMapLabelValue
208218
}, timeout, pollingInterval).Should(BeTrue())
209219

220+
By("Changing first ConfigMap so there is no conflict anymore")
221+
newSaNamespace := randomString()
222+
firstConfigMap = createConfigMapWithPolicy(configMapNs, namePrefix+randomString(),
223+
fmt.Sprintf(resource, newSaNamespace, saName, firstConfigMapLabelKey, firstConfigMapLabelValue))
224+
Expect(k8sClient.Create(context.TODO(), firstConfigMap)).To(Succeed())
225+
226+
err = retry.RetryOnConflict(retry.DefaultRetry, func() error {
227+
Expect(k8sClient.Get(context.TODO(),
228+
types.NamespacedName{Name: clusterProfile.Name}, currentClusterProfile)).To(Succeed())
229+
currentClusterProfile.Spec.PolicyRefs = []configv1beta1.PolicyRef{
230+
{
231+
Kind: string(libsveltosv1beta1.ConfigMapReferencedResourceKind),
232+
Namespace: firstConfigMap.Namespace,
233+
Name: firstConfigMap.Name,
234+
},
235+
{
236+
Kind: string(libsveltosv1beta1.ConfigMapReferencedResourceKind),
237+
Namespace: secondConfigMap.Namespace,
238+
Name: secondConfigMap.Name,
239+
Tier: lowerTier,
240+
},
241+
}
242+
return k8sClient.Update(context.TODO(), currentClusterProfile)
243+
})
244+
Expect(err).To(BeNil())
245+
246+
Byf("Verifying new ServiceAccount is present in the workload cluster with correct labels")
247+
Eventually(func() bool {
248+
currentServiceAccount := &corev1.ServiceAccount{}
249+
err = workloadClient.Get(context.TODO(),
250+
types.NamespacedName{Namespace: newSaNamespace, Name: saName},
251+
currentServiceAccount)
252+
if err != nil {
253+
return false
254+
}
255+
256+
if currentServiceAccount.Labels == nil {
257+
return false
258+
}
259+
v, ok = currentServiceAccount.Labels[firstConfigMapLabelKey]
260+
return ok && v == firstConfigMapLabelValue
261+
}, timeout, pollingInterval).Should(BeTrue())
262+
263+
Byf("Verifying ClusterSummary %s status is set to Deployed for Resources feature", clusterSummary.Name)
264+
verifyFeatureStatusIsProvisioned(clusterSummary.Namespace, clusterSummary.Name, libsveltosv1beta1.FeatureResources)
265+
210266
policies := []policy{
211267
{kind: "ServiceAccount", name: saName, namespace: saNamespace, group: ""},
268+
{kind: "ServiceAccount", name: saName, namespace: newSaNamespace, group: ""},
212269
}
213270
verifyClusterConfiguration(configv1beta1.ClusterProfileKind, clusterProfile.Name,
214271
clusterSummary.Spec.ClusterNamespace, clusterSummary.Spec.ClusterName, libsveltosv1beta1.FeatureResources,
@@ -228,5 +285,14 @@ var _ = Describe("PolicyRef Tier", func() {
228285
currentServiceAccount)
229286
return err != nil && apierrors.IsNotFound(err)
230287
}, timeout, pollingInterval).Should(BeTrue())
288+
289+
Byf("Verifying second ServiceAccount is removed from the workload cluster")
290+
Eventually(func() bool {
291+
currentServiceAccount := &corev1.ServiceAccount{}
292+
err = workloadClient.Get(context.TODO(),
293+
types.NamespacedName{Namespace: newSaNamespace, Name: saName},
294+
currentServiceAccount)
295+
return err != nil && apierrors.IsNotFound(err)
296+
}, timeout, pollingInterval).Should(BeTrue())
231297
})
232298
})

0 commit comments

Comments
 (0)