Skip to content

Commit bfddfd9

Browse files
Merge pull request #2763 from xueqzhan/jobtier-os
TRT-2518: Add variant OS and JobTier to promotion check
2 parents 2d8189e + 4d95f8c commit bfddfd9

4 files changed

Lines changed: 443 additions & 58 deletions

File tree

tools/codegen/cmd/featuregate-test-analyzer.go

Lines changed: 138 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -188,22 +188,59 @@ func (o *FeatureGateTestAnalyzerOptions) Run(ctx context.Context) error {
188188

189189
writeTestingMarkDown(testingResults, md)
190190

191-
currErrs := checkIfTestingIsSufficient(enabledFeatureGate, testingResults)
192-
if len(currErrs) == 0 {
191+
validationResults := checkIfTestingIsSufficient(enabledFeatureGate, testingResults)
192+
193+
// Separate warnings from blocking errors
194+
blockingErrors := []error{}
195+
warnings := []error{}
196+
for _, vr := range validationResults {
197+
if vr.IsWarning {
198+
warnings = append(warnings, vr.Error)
199+
} else {
200+
blockingErrors = append(blockingErrors, vr.Error)
201+
}
202+
}
203+
204+
if len(validationResults) == 0 {
193205
md.Textf("Sufficient CI testing for %q.\n", enabledFeatureGate)
194206
fmt.Fprintf(o.Out, "Sufficient CI testing for %q.\n", enabledFeatureGate)
195207
} else {
196-
md.Textf("INSUFFICIENT CI testing for %q.\n", enabledFeatureGate)
208+
if len(blockingErrors) > 0 {
209+
md.Textf("INSUFFICIENT CI testing for %q.\n", enabledFeatureGate)
210+
fmt.Fprintf(o.Out, "INSUFFICIENT CI testing for %q.\n", enabledFeatureGate)
211+
} else {
212+
md.Textf("CI testing issues found for %q (non-blocking warnings).\n", enabledFeatureGate)
213+
fmt.Fprintf(o.Out, "CI testing issues found for %q (non-blocking warnings).\n", enabledFeatureGate)
214+
}
215+
197216
md.Textf("* At least five tests are expected for a feature\n")
198217
md.Textf("* Tests must be be run on every TechPreview platform (ask for an exception if your feature doesn't support a variant)")
199218
md.Textf("* All tests must run at least 14 times on every platform")
200219
md.Textf("* All tests must pass at least 95%% of the time")
220+
md.Textf("* JobTier must be one of: standard, informing, blocking\n")
201221
md.Text("")
222+
223+
if len(warnings) > 0 {
224+
md.Textf("**Non-blocking warnings (optional variants):**\n")
225+
for _, warn := range warnings {
226+
md.Textf(" - %s\n", warn.Error())
227+
}
228+
md.Text("")
229+
}
230+
231+
if len(blockingErrors) > 0 {
232+
md.Textf("**Blocking errors:**\n")
233+
for _, err := range blockingErrors {
234+
md.Textf(" - %s\n", err.Error())
235+
}
236+
md.Text("")
237+
}
202238
md.Text("")
203-
fmt.Fprintf(o.Out, "INSUFFICIENT CI testing for %q.\n", enabledFeatureGate)
204239
}
205-
errs = append(errs, currErrs...)
206-
featureGateHTMLData = append(featureGateHTMLData, buildHTMLFeatureGateData(enabledFeatureGate, testingResults, currErrs, release))
240+
241+
// Only add blocking errors to the error list (warnings don't fail the job)
242+
errs = append(errs, blockingErrors...)
243+
featureGateHTMLData = append(featureGateHTMLData, buildHTMLFeatureGateData(enabledFeatureGate, testingResults, blockingErrors, release))
207244

208245
}
209246

@@ -223,7 +260,7 @@ func (o *FeatureGateTestAnalyzerOptions) Run(ctx context.Context) error {
223260
return errors.Join(errs...)
224261
}
225262

226-
func buildHTMLFeatureGateData(name string, testingResults map[JobVariant]*TestingResults, errs []error, release string) utils.HTMLFeatureGate {
263+
func buildHTMLFeatureGateData(name string, testingResults map[JobVariant]*TestingResults, blockingErrors []error, release string) utils.HTMLFeatureGate {
227264
jobVariantsSet := sets.KeySet(testingResults)
228265
jobVariants := jobVariantsSet.UnsortedList()
229266

@@ -244,6 +281,9 @@ func buildHTMLFeatureGateData(name string, testingResults map[JobVariant]*Testin
244281
Cloud: jv.Cloud,
245282
Architecture: jv.Architecture,
246283
NetworkStack: jv.NetworkStack,
284+
OS: jv.OS,
285+
JobTiers: jv.JobTiers,
286+
Optional: jv.Optional,
247287
ColIndex: i + 1,
248288
})
249289
}
@@ -272,6 +312,7 @@ func buildHTMLFeatureGateData(name string, testingResults map[JobVariant]*Testin
272312
jobVariant.Cloud,
273313
jobVariant.Architecture,
274314
jobVariant.NetworkStack,
315+
jobVariant.OS,
275316
),
276317
}
277318
if testResults == nil {
@@ -296,7 +337,7 @@ func buildHTMLFeatureGateData(name string, testingResults map[JobVariant]*Testin
296337

297338
return utils.HTMLFeatureGate{
298339
Name: name,
299-
Sufficient: len(errs) == 0,
340+
Sufficient: len(blockingErrors) == 0,
300341
Variants: variants,
301342
Tests: tests,
302343
}
@@ -326,15 +367,29 @@ func writeHTMLFromTemplate(filename string, featureGateHTMLData []utils.HTMLFeat
326367
return nil
327368
}
328369

329-
func checkIfTestingIsSufficient(featureGate string, testingResults map[JobVariant]*TestingResults) []error {
330-
errs := []error{}
370+
func checkIfTestingIsSufficient(featureGate string, testingResults map[JobVariant]*TestingResults) []ValidationResult {
371+
results := []ValidationResult{}
372+
331373
for jobVariant, testedVariant := range testingResults {
374+
// Use the Optional field to determine if validation failures are warnings or errors
375+
// Optional variants (like RHEL 10 in 4.22) have non-blocking warnings
376+
isOptional := jobVariant.Optional
377+
332378
if len(testedVariant.TestResults) < requiredNumberOfTests {
333-
errs = append(errs, fmt.Errorf("error: only %d tests found, need at least %d for %q on %v", len(testedVariant.TestResults), requiredNumberOfTests, featureGate, jobVariant))
379+
results = append(results, ValidationResult{
380+
Error: fmt.Errorf("error: only %d tests found, need at least %d for %q on %v",
381+
len(testedVariant.TestResults), requiredNumberOfTests, featureGate, jobVariant),
382+
IsWarning: isOptional,
383+
})
334384
}
385+
335386
for _, testResults := range testedVariant.TestResults {
336387
if testResults.TotalRuns < requiredNumberOfTestRunsPerVariant {
337-
errs = append(errs, fmt.Errorf("error: %q only has %d runs, need at least %d runs for %q on %v", testResults.TestName, testResults.TotalRuns, requiredNumberOfTestRunsPerVariant, featureGate, jobVariant))
388+
results = append(results, ValidationResult{
389+
Error: fmt.Errorf("error: %q only has %d runs, need at least %d runs for %q on %v",
390+
testResults.TestName, testResults.TotalRuns, requiredNumberOfTestRunsPerVariant, featureGate, jobVariant),
391+
IsWarning: isOptional,
392+
})
338393
}
339394
if testResults.TotalRuns == 0 {
340395
continue
@@ -343,12 +398,16 @@ func checkIfTestingIsSufficient(featureGate string, testingResults map[JobVarian
343398
if passPercent < requiredPassRateOfTestsPerVariant {
344399
displayExpected := int(requiredPassRateOfTestsPerVariant * 100)
345400
displayActual := int(passPercent * 100)
346-
errs = append(errs, fmt.Errorf("error: %q only passed %d%%, need at least %d%% for %q on %v", testResults.TestName, displayActual, displayExpected, featureGate, jobVariant))
401+
results = append(results, ValidationResult{
402+
Error: fmt.Errorf("error: %q only passed %d%%, need at least %d%% for %q on %v",
403+
testResults.TestName, displayActual, displayExpected, featureGate, jobVariant),
404+
IsWarning: isOptional,
405+
})
347406
}
348407
}
349408
}
350409

351-
return errs
410+
return results
352411
}
353412

354413
func writeTestingMarkDown(testingResults map[JobVariant]*TestingResults, md *utils.Markdown) {
@@ -364,6 +423,12 @@ func writeTestingMarkDown(testingResults map[JobVariant]*TestingResults, md *uti
364423
if jobVariant.NetworkStack != "" {
365424
columnHeader = columnHeader + fmt.Sprintf("<br/> %v ", jobVariant.NetworkStack)
366425
}
426+
if jobVariant.OS != "" {
427+
columnHeader = columnHeader + fmt.Sprintf("<br/> OS:%v ", jobVariant.OS)
428+
}
429+
if jobVariant.JobTiers != "" {
430+
columnHeader = columnHeader + fmt.Sprintf("<br/> Tiers:%v ", jobVariant.JobTiers)
431+
}
367432
md.Exact(columnHeader)
368433
}
369434
md.EndTableRow()
@@ -458,6 +523,13 @@ var (
458523
Architecture: "amd64",
459524
Topology: "single",
460525
},
526+
{
527+
Cloud: "aws",
528+
Architecture: "amd64",
529+
Topology: "ha",
530+
OS: "rhel10",
531+
Optional: true, // RHEL 10 is optional in 4.22, will be required in OCP 5
532+
},
461533

462534
// TODO restore these once we run TechPreview jobs that contain them
463535
//{
@@ -533,6 +605,9 @@ type JobVariant struct {
533605
Architecture string
534606
Topology string
535607
NetworkStack string
608+
OS string
609+
JobTiers string // Comma-separated tiers (e.g., "standard,informing,blocking"). If empty, defaults to "standard,informing,blocking"
610+
Optional bool // If true, validation failures for this variant are non-blocking warnings
536611
}
537612

538613
type OrderedJobVariants []JobVariant
@@ -575,6 +650,12 @@ type TestResults struct {
575650
FlakedRuns int
576651
}
577652

653+
// ValidationResult represents a validation error or warning
654+
type ValidationResult struct {
655+
Error error
656+
IsWarning bool // if true, this is a non-blocking warning (for optional variants)
657+
}
658+
578659
func testResultByName(results []TestResults, testName string) *TestResults {
579660
for _, curr := range results {
580661
if curr.TestName == testName {
@@ -584,6 +665,36 @@ func testResultByName(results []TestResults, testName string) *TestResults {
584665
return nil
585666
}
586667

668+
func validateJobTiers(jobVariant JobVariant) error {
669+
if jobVariant.JobTiers == "" {
670+
return nil // Empty is valid - will default to standard,informing,blocking
671+
}
672+
673+
validTiers := map[string]bool{
674+
"standard": true,
675+
"informing": true,
676+
"blocking": true,
677+
}
678+
679+
hasValidTier := false
680+
for _, tier := range strings.Split(jobVariant.JobTiers, ",") {
681+
tier = strings.TrimSpace(tier)
682+
if tier != "" {
683+
hasValidTier = true
684+
if !validTiers[tier] {
685+
return fmt.Errorf("invalid JobTier %q in variant %+v - must be one of: standard, informing, blocking", tier, jobVariant)
686+
}
687+
}
688+
}
689+
690+
// Reject malformed strings like "," or " , " that contain no valid tiers
691+
if !hasValidTier {
692+
return fmt.Errorf("JobTiers string %q contains no valid tier names in variant %+v", jobVariant.JobTiers, jobVariant)
693+
}
694+
695+
return nil
696+
}
697+
587698
func listTestResultFor(featureGate string, clusterProfiles sets.Set[string]) (map[JobVariant]*TestingResults, error) {
588699
fmt.Printf("Query sippy for all test run results for feature gate %q on clusterProfile %q\n", featureGate, sets.List(clusterProfiles))
589700

@@ -605,6 +716,13 @@ func listTestResultFor(featureGate string, clusterProfiles sets.Set[string]) (ma
605716
jobVariantsToCheck = append(jobVariantsToCheck, selfManagedPlatformVariants...)
606717
}
607718

719+
// Validate all variants before making expensive API calls
720+
for _, jobVariant := range jobVariantsToCheck {
721+
if err := validateJobTiers(jobVariant); err != nil {
722+
return nil, err
723+
}
724+
}
725+
608726
for _, jobVariant := range jobVariantsToCheck {
609727
jobVariantResults, err := listTestResultForVariant(featureGate, jobVariant)
610728
if err != nil {
@@ -720,7 +838,7 @@ func listTestResultForVariant(featureGate string, jobVariant JobVariant) (*Testi
720838
}
721839

722840
testNameToResults := map[string]*TestResults{}
723-
queries := sippy.QueriesFor(jobVariant.Cloud, jobVariant.Architecture, jobVariant.Topology, jobVariant.NetworkStack, testPattern)
841+
queries := sippy.QueriesFor(jobVariant.Cloud, jobVariant.Architecture, jobVariant.Topology, jobVariant.NetworkStack, jobVariant.OS, jobVariant.JobTiers, testPattern)
724842
release, err := getRelease()
725843
if err != nil {
726844
return nil, fmt.Errorf("couldn't fetch latest release version: %w", err)
@@ -776,11 +894,12 @@ func listTestResultForVariant(featureGate string, jobVariant JobVariant) (*Testi
776894

777895
// Try to find enough test results in the last week, but if we have to we can extend
778896
// the window to two weeks.
897+
// NOTE: Use += to accumulate results across multiple JobTier queries
779898
if currTest.CurrentRuns >= requiredNumberOfTestRunsPerVariant {
780-
testResults.TotalRuns = currTest.CurrentRuns
781-
testResults.SuccessfulRuns = currTest.CurrentSuccesses
782-
testResults.FailedRuns = currTest.CurrentFailures
783-
testResults.FlakedRuns = currTest.CurrentFlakes
899+
testResults.TotalRuns += currTest.CurrentRuns
900+
testResults.SuccessfulRuns += currTest.CurrentSuccesses
901+
testResults.FailedRuns += currTest.CurrentFailures
902+
testResults.FlakedRuns += currTest.CurrentFlakes
784903
} else {
785904
fmt.Printf("Insufficient results in last 7 days, increasing lookback to 2 weeks...")
786905
testResults.TotalRuns += currTest.CurrentRuns + currTest.PreviousRuns

0 commit comments

Comments
 (0)