-
Notifications
You must be signed in to change notification settings - Fork 2
fix: isolate exporter timeout phases #154
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ package exporter | |
|
|
||
| import ( | ||
| "context" | ||
| "errors" | ||
| "fmt" | ||
| "time" | ||
|
|
||
|
|
@@ -173,26 +174,35 @@ func (e *healthExporter) ExportNow(ctx context.Context) error { | |
| // export performs the actual data export operation | ||
| func (e *healthExporter) export() error { | ||
| log.Logger.Infow("Starting health export") | ||
| ctx, cancel := context.WithTimeout(e.ctx, e.options.timeout) | ||
| defer cancel() | ||
| collectCtx, cancelCollect := context.WithTimeout(e.ctx, e.options.timeout) | ||
| defer cancelCollect() | ||
|
Comment on lines
+180
to
+181
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Verify that ExportNow accepts a caller context while export() roots phase contexts in e.ctx.
rg -n -C2 --type go \
-e 'func \(e \*healthExporter\) ExportNow' \
-e 'func \(e \*healthExporter\) export\(' \
-e 'context\.WithTimeout\(e\.ctx'Repository: NVIDIA/fleet-intelligence-agent Length of output: 1364 🏁 Script executed: #!/bin/bash
# Find all calls to export() within the file
rg -n 'e\.export\(\)' internal/exporter/exporter.go
# Check the Start() method to understand context lifecycle
rg -n -A 20 'func \(e \*healthExporter\) Start' internal/exporter/exporter.go
# Check Stop/Close methods if they exist
rg -n -B 2 -A 10 'func \(e \*healthExporter\) Stop\|func \(e \*healthExporter\) Close' internal/exporter/exporter.goRepository: NVIDIA/fleet-intelligence-agent Length of output: 739 🏁 Script executed: #!/bin/bash
# Show more of the Start() method, particularly around line 147
sed -n '123,160p' internal/exporter/exporter.go
# Search for where e.ctx is created (likely in constructor or New function)
rg -n 'e\.ctx.*=' internal/exporter/exporter.go | head -20
# Show the healthExporter struct definition
rg -n -B 5 -A 15 'type healthExporter struct' internal/exporter/exporter.goRepository: NVIDIA/fleet-intelligence-agent Length of output: 1737 🏁 Script executed: #!/bin/bash
# Show the complete New() function
sed -n '61,100p' internal/exporter/exporter.go
# Show the complete Stop() method
rg -n -A 20 'func \(e \*healthExporter\) Stop\(\)' internal/exporter/exporter.goRepository: NVIDIA/fleet-intelligence-agent Length of output: 2028 Pass the caller context to the export method.
Additionally, when Also applies to: 193-198, 205-206 🤖 Prompt for AI Agents |
||
|
|
||
| // Refresh configuration from metadata on every export | ||
| // If the endpoints/auth token are not empty, export will continue | ||
| // If the endpoints/auth token are empty, exportHTTP will skip | ||
| e.refreshConfigFromMetadata(ctx) | ||
| e.refreshConfigFromMetadata(collectCtx) | ||
|
|
||
| // Collect health data | ||
| healthData, err := e.collector.Collect(ctx) | ||
| if err != nil { | ||
| healthData, err := e.collector.Collect(collectCtx) | ||
| if err != nil && healthData == nil { | ||
| return fmt.Errorf("collection failed: %w", err) | ||
| } | ||
| if err != nil { | ||
| log.Logger.Warnw("Collection completed with partial data", | ||
| "error", err, | ||
| "timed_out", errors.Is(err, context.DeadlineExceeded), | ||
| "canceled", errors.Is(err, context.Canceled)) | ||
| } | ||
|
|
||
| // Export data based on mode | ||
| if e.options.config.OfflineMode { | ||
| return e.exportToFile(healthData) | ||
| } else { | ||
| return e.exportToHTTP(ctx, healthData) | ||
| } | ||
|
|
||
| uploadCtx, cancelUpload := context.WithTimeout(e.ctx, e.options.timeout) | ||
| defer cancelUpload() | ||
|
|
||
| return e.exportToHTTP(uploadCtx, healthData) | ||
| } | ||
|
|
||
| // exportToFile writes health data to files | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Treat machine-info cancellation like the other partial-data phases.
This branch still logs
context.Canceled/context.DeadlineExceededasErrorwbefore returning the same partial-data error path. Metrics, events, and component data already suppress that noise; machine info should match.🔧 Suggested adjustment
if c.config.IncludeMachineInfo { if err := c.collectMachineInfo(ctx, data); err != nil { + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return data, err + } log.Logger.Errorw("Failed to collect machine info", "error", err) } }📝 Committable suggestion
🤖 Prompt for AI Agents