Skip to content

Commit ecf6f25

Browse files
committed
fix(conformance): raise OOB create/delete timeout to 30m, make it overridable
The conformance framework's retryOnRecoverable helper was hard-coding a 10-minute deadline on the single-attempt wait for OOB plugin operations. Resources that legitimately take longer to reach a terminal state — most notably AWS::EKS::Cluster, which typically needs 10–15 min to become ACTIVE — fail the Discovery test's CreateOOB step purely because the deadline fires before AWS finishes provisioning. The outer retry loop then burns through its budget on subsequent attempts until the matrix job's 2h cap cancels the run. Raise the default to 30 min, which covers the cloud resources we've actually tested. Plugin authors with even slower resources can override via FORMAE_CONFORMANCE_OOB_TIMEOUT (any Go duration string, e.g. "45m" or "1h").
1 parent 0d353a4 commit ecf6f25

1 file changed

Lines changed: 19 additions & 2 deletions

File tree

pkg/plugin-conformance-tests/harness.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1614,6 +1614,23 @@ type pluginOperationResult struct {
16141614
err string // non-empty if the coordinator returned an error
16151615
}
16161616

1617+
// oobOperationTimeout caps how long retryOnRecoverable will wait on a single
1618+
// OOB create/delete attempt. Slow cloud resources (e.g. AWS::EKS::Cluster,
1619+
// which legitimately takes 10–15 min to reach ACTIVE) need a generous budget,
1620+
// so the default is 30 min. Plugin authors with even slower resources can
1621+
// override via FORMAE_CONFORMANCE_OOB_TIMEOUT (any Go duration string, e.g.
1622+
// "45m" or "1h").
1623+
const defaultOOBOperationTimeout = 30 * time.Minute
1624+
1625+
func oobOperationTimeout() time.Duration {
1626+
if v := os.Getenv("FORMAE_CONFORMANCE_OOB_TIMEOUT"); v != "" {
1627+
if d, err := time.ParseDuration(v); err == nil && d > 0 {
1628+
return d
1629+
}
1630+
}
1631+
return defaultOOBOperationTimeout
1632+
}
1633+
16171634
// retryOnRecoverable executes a plugin operation (create/delete) with retries on recoverable errors.
16181635
// The opFn performs the actor call and returns the initial result. The caller's label is used for logging.
16191636
func (h *TestHarness) retryOnRecoverable(label string, opFn func() (*pluginOperationResult, error)) (resource.ProgressResult, error) {
@@ -1631,8 +1648,8 @@ func (h *TestHarness) retryOnRecoverable(label string, opFn func() (*pluginOpera
16311648

16321649
progress := res.initialProgress
16331650
if progress.OperationStatus == resource.OperationStatusInProgress {
1634-
h.t.Logf("%s in progress, waiting for completion...", label)
1635-
progress, err = h.waitForOperationProgress(res.operatorPID, progress, 10*time.Minute)
1651+
h.t.Logf("%s in progress, waiting for completion (timeout %s)...", label, oobOperationTimeout())
1652+
progress, err = h.waitForOperationProgress(res.operatorPID, progress, oobOperationTimeout())
16361653
if err != nil {
16371654
return resource.ProgressResult{}, fmt.Errorf("waiting for %s to complete: %w", label, err)
16381655
}

0 commit comments

Comments
 (0)