|
| 1 | +package sender |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "errors" |
| 6 | + "fmt" |
| 7 | + "log" |
| 8 | + "time" |
| 9 | + |
| 10 | + "github.com/sei-protocol/sei-load/stats" |
| 11 | + "github.com/sei-protocol/sei-load/utils/service" |
| 12 | + "golang.org/x/time/rate" |
| 13 | +) |
| 14 | + |
| 15 | +// This will manage the ramping process for the loadtest |
| 16 | +// Ramping loadtest will being at the StartTps and spend LoadTime at each step, ending when we violate the chain SLO of |
| 17 | +// 1 block per second over a given ramp period (as measured in the back half of the ramp time) |
| 18 | +// If we successfully pass a given TPS, we will pause for PauseTime, and then start the next step. |
| 19 | +// If we fail to pass a given TPS, we will stop the loadtest. |
| 20 | + |
| 21 | +var ErrRampTestFailedSLO = errors.New("Ramp Test failed SLO") |
| 22 | + |
| 23 | +func (r *Ramper) FormatRampStats() string { |
| 24 | + return fmt.Sprintf(` |
| 25 | +───────────────────────────────────────── |
| 26 | + RAMP STATISTICS |
| 27 | +───────────────────────────────────────── |
| 28 | + Ramp Curve Stats: |
| 29 | + %s |
| 30 | +───────────────────────────────────────── |
| 31 | + Window Block Stats: |
| 32 | + %s |
| 33 | +─────────────────────────────────────────`, |
| 34 | + r.rampCurve.GetCurveStats(), r.lastWindowStats.FormatBlockStats()) |
| 35 | +} |
| 36 | + |
| 37 | +type Ramper struct { |
| 38 | + sharedLimiter *rate.Limiter |
| 39 | + blockCollector stats.BlockStatsProvider |
| 40 | + currentTps float64 |
| 41 | + startTime time.Time |
| 42 | + rampCurve RampCurve |
| 43 | + lastWindowStats stats.BlockStats |
| 44 | +} |
| 45 | + |
| 46 | +// RampCurve is a function that returns the target TPS at a given time in the ramp period |
| 47 | +type RampCurve interface { |
| 48 | + GetTPS(t time.Duration) float64 |
| 49 | + GetCurveStats() string |
| 50 | +} |
| 51 | + |
| 52 | +func NewRamper(rampCurve RampCurve, blockCollector stats.BlockStatsProvider, sharedLimiter *rate.Limiter) *Ramper { |
| 53 | + sharedLimiter.SetLimit(rate.Limit(1)) // reset limiter to 1 |
| 54 | + return &Ramper{ |
| 55 | + sharedLimiter: sharedLimiter, |
| 56 | + blockCollector: blockCollector, |
| 57 | + rampCurve: rampCurve, |
| 58 | + } |
| 59 | +} |
| 60 | + |
| 61 | +func (r *Ramper) UpdateTPS() { |
| 62 | + timeSinceStart := time.Since(r.startTime) |
| 63 | + r.currentTps = r.rampCurve.GetTPS(timeSinceStart) |
| 64 | + r.sharedLimiter.SetLimit(rate.Limit(r.currentTps)) |
| 65 | +} |
| 66 | + |
| 67 | +func (r *Ramper) LogFinalStats() { |
| 68 | + log.Printf("Final Ramp stats: \n%s", r.FormatRampStats()) |
| 69 | +} |
| 70 | + |
| 71 | +// WatchSLO will evaluate the chain SLO every 100ms using a 30 second window, and return a channel if the SLO is violated |
| 72 | +func (r *Ramper) WatchSLO(ctx context.Context) <-chan struct{} { |
| 73 | + ch := make(chan struct{}) |
| 74 | + go func() { |
| 75 | + defer close(ch) |
| 76 | + |
| 77 | + log.Println("🔍 Ramping watching chain SLO with 30s windows, checking every 100ms") |
| 78 | + |
| 79 | + // Two separate timers: frequent SLO checks and window resets |
| 80 | + sloCheckTicker := time.NewTicker(100 * time.Millisecond) |
| 81 | + windowResetTicker := time.NewTicker(30 * time.Second) |
| 82 | + defer sloCheckTicker.Stop() |
| 83 | + defer windowResetTicker.Stop() |
| 84 | + |
| 85 | + // Reset window stats at the start |
| 86 | + r.blockCollector.ResetWindowStats() |
| 87 | + |
| 88 | + for { |
| 89 | + select { |
| 90 | + case <-ctx.Done(): |
| 91 | + return |
| 92 | + case <-sloCheckTicker.C: |
| 93 | + // Check SLO every 100ms |
| 94 | + p90BlockTime := r.blockCollector.GetWindowBlockTimePercentile(90) |
| 95 | + if p90BlockTime > 1*time.Second { |
| 96 | + log.Printf("❌ SLO violated: 90th percentile block time %v exceeds 1s threshold", p90BlockTime) |
| 97 | + select { |
| 98 | + case ch <- struct{}{}: |
| 99 | + case <-ctx.Done(): |
| 100 | + } |
| 101 | + return |
| 102 | + } |
| 103 | + case <-windowResetTicker.C: |
| 104 | + // Reset window stats every 30 seconds for fresh measurements |
| 105 | + log.Printf("🔄 Resetting SLO window stats (30s period)") |
| 106 | + // save last window stats |
| 107 | + r.lastWindowStats = r.blockCollector.GetWindowBlockStats() |
| 108 | + r.blockCollector.ResetWindowStats() |
| 109 | + } |
| 110 | + } |
| 111 | + }() |
| 112 | + return ch |
| 113 | +} |
| 114 | + |
| 115 | +// Start initializes and starts all workers |
| 116 | +func (r *Ramper) Run(ctx context.Context) error { |
| 117 | + return service.Run(ctx, func(ctx context.Context, s service.Scope) error { |
| 118 | + // TODO: Implement ramping logic |
| 119 | + r.startTime = time.Now() |
| 120 | + sloChan := r.WatchSLO(ctx) |
| 121 | + tpsUpdateTicker := time.NewTicker(100 * time.Millisecond) |
| 122 | + for ctx.Err() == nil { |
| 123 | + |
| 124 | + select { |
| 125 | + case <-sloChan: |
| 126 | + r.sharedLimiter.SetLimit(rate.Limit(1)) |
| 127 | + log.Printf("❌ Ramping failed to pass SLO, stopping loadtest, failure window blockstats:") |
| 128 | + log.Println(r.blockCollector.GetWindowBlockStats().FormatBlockStats()) |
| 129 | + return ErrRampTestFailedSLO |
| 130 | + case <-tpsUpdateTicker.C: |
| 131 | + r.UpdateTPS() |
| 132 | + case <-ctx.Done(): |
| 133 | + return ctx.Err() |
| 134 | + } |
| 135 | + } |
| 136 | + return ctx.Err() |
| 137 | + }) |
| 138 | +} |
| 139 | + |
| 140 | +type RampCurveStep struct { |
| 141 | + StartTps float64 |
| 142 | + IncrementTps float64 |
| 143 | + LoadInterval time.Duration |
| 144 | + RecoveryInterval time.Duration |
| 145 | + Step int |
| 146 | + CurrentTPS float64 |
| 147 | +} |
| 148 | + |
| 149 | +func NewRampCurveStep(startTps float64, incrementTps float64, loadInterval time.Duration, recoveryInterval time.Duration) *RampCurveStep { |
| 150 | + return &RampCurveStep{ |
| 151 | + StartTps: startTps, |
| 152 | + IncrementTps: incrementTps, |
| 153 | + LoadInterval: loadInterval, |
| 154 | + RecoveryInterval: recoveryInterval, |
| 155 | + Step: 0, |
| 156 | + CurrentTPS: startTps, |
| 157 | + } |
| 158 | +} |
| 159 | + |
| 160 | +func (r *RampCurveStep) GetStartTps() float64 { |
| 161 | + return r.StartTps |
| 162 | +} |
| 163 | + |
| 164 | +func (r *RampCurveStep) GetIncrementTps() float64 { |
| 165 | + return r.IncrementTps |
| 166 | +} |
| 167 | + |
| 168 | +func (r *RampCurveStep) GetTPS(t time.Duration) float64 { |
| 169 | + // figure out where we are in the load interval |
| 170 | + cycleInterval := r.LoadInterval + r.RecoveryInterval |
| 171 | + cycleProgress := t % cycleInterval |
| 172 | + |
| 173 | + // if we're in the recovery interval, return 1.00 (close to 0 but doesn't fully block the limiter) |
| 174 | + if cycleProgress > r.LoadInterval { |
| 175 | + return 1.00 |
| 176 | + } |
| 177 | + |
| 178 | + cycleNumber := int(t / cycleInterval) |
| 179 | + |
| 180 | + // this means we're in a new step, so we need to update step and TPS |
| 181 | + if cycleNumber > r.Step { |
| 182 | + r.Step = cycleNumber |
| 183 | + newTps := r.StartTps + r.IncrementTps*float64(r.Step) |
| 184 | + log.Printf("📈 Ramping to %f TPS for %v", newTps, r.LoadInterval) |
| 185 | + r.CurrentTPS = newTps |
| 186 | + return newTps |
| 187 | + } |
| 188 | + |
| 189 | + return r.CurrentTPS |
| 190 | +} |
| 191 | + |
| 192 | +// this should return the highest target TPS that is PRIOR to the current step |
| 193 | +func (r *RampCurveStep) GetCurveStats() string { |
| 194 | + step := r.Step - 1 |
| 195 | + if step < 0 { |
| 196 | + return "no ramp curve stats available" |
| 197 | + } |
| 198 | + return fmt.Sprintf("Highest Passed TPS: %.2f", r.StartTps+r.IncrementTps*float64(step)) |
| 199 | +} |
0 commit comments