4747 * length). In turn, the "intercepts" metric reflects the relative frequency of
4848 * situations in which the measured idle duration is so much shorter than the
4949 * sleep length that the bin it falls into corresponds to an idle state
50- * shallower than the one whose bin is fallen into by the sleep length.
50+ * shallower than the one whose bin is fallen into by the sleep length (these
51+ * situations are referred to as "intercepts" below).
52+ *
53+ * In addition to the metrics described above, the governor counts recent
54+ * intercepts (that is, intercepts that have occurred during the last NR_RECENT
55+ * invocations of it for the given CPU) for each bin.
5156 *
5257 * In order to select an idle state for a CPU, the governor takes the following
5358 * steps (modulo the possible latency constraint that must be taken into account
5459 * too):
5560 *
5661 * 1. Find the deepest CPU idle state whose target residency does not exceed
57- * the current sleep length (the candidate idle state) and compute two sums
58- * as follows:
62+ * the current sleep length (the candidate idle state) and compute 3 sums as
63+ * follows:
5964 *
6065 * - The sum of the "hits" and "intercepts" metrics for the candidate state
6166 * and all of the deeper idle states (it represents the cases in which the
6772 * idle long enough to avoid being intercepted if the sleep length had been
6873 * equal to the current one).
6974 *
70- * 2. If the second sum is greater than the first one, look for an alternative
71- * idle state to select.
75+ * - The sum of the numbers of recent intercepts for all of the idle states
76+ * shallower than the candidate one.
77+ *
78+ * 2. If the second sum is greater than the first one or the third sum is
79+ * greater than NR_RECENT / 2, the CPU is likely to wake up early, so look
80+ * for an alternative idle state to select.
7281 *
7382 * - Traverse the idle states shallower than the candidate one in the
7483 * descending order.
7584 *
76- * - For each of them compute the sum of the "intercepts" metrics over all of
77- * the idle states between it and the candidate one (including the former
78- * and excluding the latter).
85+ * - For each of them compute the sum of the "intercepts" metrics and the sum
86+ * of the numbers of recent intercepts over all of the idle states between
87+ * it and the candidate one (including the former and excluding the
88+ * latter).
7989 *
80- * - If that sum is greater than a half of the second sum computed in step 1
81- * (which means that the target residency of the state in question had not
82- * exceeded the idle duration in over a half of the relevant cases), select
83- * the given idle state instead of the candidate one.
90+ * - If each of these sums that needs to be taken into account (because the
91+ * check related to it has indicated that the CPU is likely to wake up
92+ * early) is greater than a half of the corresponding sum computed in step
93+ * 1 (which means that the target residency of the state in question had
94+ * not exceeded the idle duration in over a half of the relevant cases),
95+ * select the given idle state instead of the candidate one.
8496 *
85- * 3. If the majority of the most recent idle duration values are below the
86- * current anticipated idle duration, use those values to compute the new
87- * expected idle duration and find an idle state matching it (which has to
88- * be shallower than the current candidate one).
97+ * 3. By default, select the candidate state.
8998 */
9099
91100#include <linux/cpuidle.h>
103112
104113/*
105114 * Number of the most recent idle duration values to take into consideration for
106- * the detection of wakeup patterns.
115+ * the detection of recent early wakeup patterns.
107116 */
108- #define INTERVALS 8
117+ #define NR_RECENT 9
109118
110119/**
111120 * struct teo_bin - Metrics used by the TEO cpuidle governor.
112121 * @intercepts: The "intercepts" metric.
113122 * @hits: The "hits" metric.
123+ * @recent: The number of recent "intercepts".
114124 */
115125struct teo_bin {
116126 unsigned int intercepts ;
117127 unsigned int hits ;
128+ unsigned int recent ;
118129};
119130
120131/**
@@ -123,16 +134,16 @@ struct teo_bin {
123134 * @sleep_length_ns: Time till the closest timer event (at the selection time).
124135 * @state_bins: Idle state data bins for this CPU.
125136 * @total: Grand total of the "intercepts" and "hits" mertics for all bins.
126- * @interval_idx : Index of the most recent saved idle interval .
127- * @intervals: Saved idle duration values .
137+ * @next_recent_idx : Index of the next @recent_idx entry to update .
138+ * @recent_idx: Indices of bins corresponding to recent "intercepts" .
128139 */
129140struct teo_cpu {
130141 s64 time_span_ns ;
131142 s64 sleep_length_ns ;
132143 struct teo_bin state_bins [CPUIDLE_STATE_MAX ];
133144 unsigned int total ;
134- int interval_idx ;
135- u64 intervals [ INTERVALS ];
145+ int next_recent_idx ;
146+ int recent_idx [ NR_RECENT ];
136147};
137148
138149static DEFINE_PER_CPU (struct teo_cpu , teo_cpus ) ;
@@ -201,26 +212,29 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
201212 }
202213 }
203214
215+ i = cpu_data -> next_recent_idx ++ ;
216+ if (cpu_data -> next_recent_idx >= NR_RECENT )
217+ cpu_data -> next_recent_idx = 0 ;
218+
219+ if (cpu_data -> recent_idx [i ] >= 0 )
220+ cpu_data -> state_bins [cpu_data -> recent_idx [i ]].recent -- ;
221+
204222 /*
205223 * If the measured idle duration falls into the same bin as the sleep
206224 * length, this is a "hit", so update the "hits" metric for that bin.
207225 * Otherwise, update the "intercepts" metric for the bin fallen into by
208226 * the measured idle duration.
209227 */
210- if (idx_timer == idx_duration )
228+ if (idx_timer == idx_duration ) {
211229 cpu_data -> state_bins [idx_timer ].hits += PULSE ;
212- else
230+ cpu_data -> recent_idx [i ] = -1 ;
231+ } else {
213232 cpu_data -> state_bins [idx_duration ].intercepts += PULSE ;
233+ cpu_data -> state_bins [idx_duration ].recent ++ ;
234+ cpu_data -> recent_idx [i ] = idx_duration ;
235+ }
214236
215237 cpu_data -> total += PULSE ;
216-
217- /*
218- * Save idle duration values corresponding to non-timer wakeups for
219- * pattern detection.
220- */
221- cpu_data -> intervals [cpu_data -> interval_idx ++ ] = measured_ns ;
222- if (cpu_data -> interval_idx >= INTERVALS )
223- cpu_data -> interval_idx = 0 ;
224238}
225239
226240static bool teo_time_ok (u64 interval_ns )
@@ -271,10 +285,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
271285 s64 latency_req = cpuidle_governor_latency_req (dev -> cpu );
272286 unsigned int idx_intercept_sum = 0 ;
273287 unsigned int intercept_sum = 0 ;
288+ unsigned int idx_recent_sum = 0 ;
289+ unsigned int recent_sum = 0 ;
274290 unsigned int idx_hit_sum = 0 ;
275291 unsigned int hit_sum = 0 ;
276292 int constraint_idx = 0 ;
277293 int idx0 = 0 , idx = -1 ;
294+ bool alt_intercepts , alt_recent ;
278295 ktime_t delta_tick ;
279296 s64 duration_ns ;
280297 int i ;
@@ -317,6 +334,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
317334 */
318335 intercept_sum += prev_bin -> intercepts ;
319336 hit_sum += prev_bin -> hits ;
337+ recent_sum += prev_bin -> recent ;
320338
321339 if (dev -> states_usage [i ].disable )
322340 continue ;
@@ -336,6 +354,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
336354
337355 idx_intercept_sum = intercept_sum ;
338356 idx_hit_sum = hit_sum ;
357+ idx_recent_sum = recent_sum ;
339358 }
340359
341360 /* Avoid unnecessary overhead. */
@@ -350,27 +369,36 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
350369 * If the sum of the intercepts metric for all of the idle states
351370 * shallower than the current candidate one (idx) is greater than the
352371 * sum of the intercepts and hits metrics for the candidate state and
353- * all of the deeper states, the CPU is likely to wake up early, so find
354- * an alternative idle state to select.
372+ * all of the deeper states, or the sum of the numbers of recent
373+ * intercepts over all of the states shallower than the candidate one
374+ * is greater than a half of the number of recent events taken into
375+ * account, the CPU is likely to wake up early, so find an alternative
376+ * idle state to select.
355377 */
356- if (2 * idx_intercept_sum > cpu_data -> total - idx_hit_sum ) {
378+ alt_intercepts = 2 * idx_intercept_sum > cpu_data -> total - idx_hit_sum ;
379+ alt_recent = idx_recent_sum > NR_RECENT / 2 ;
380+ if (alt_recent || alt_intercepts ) {
357381 s64 last_enabled_span_ns = duration_ns ;
358382 int last_enabled_idx = idx ;
359383
360384 /*
361385 * Look for the deepest idle state whose target residency had
362386 * not exceeded the idle duration in over a half of the relevant
363- * cases in the past.
387+ * cases (both with respect to intercepts overall and with
388+ * respect to the recent intercepts only) in the past.
364389 *
365390 * Take the possible latency constraint and duration limitation
366391 * present if the tick has been stopped already into account.
367392 */
368393 intercept_sum = 0 ;
394+ recent_sum = 0 ;
369395
370396 for (i = idx - 1 ; i >= idx0 ; i -- ) {
397+ struct teo_bin * bin = & cpu_data -> state_bins [i ];
371398 s64 span_ns ;
372399
373- intercept_sum += cpu_data -> state_bins [i ].intercepts ;
400+ intercept_sum += bin -> intercepts ;
401+ recent_sum += bin -> recent ;
374402
375403 if (dev -> states_usage [i ].disable )
376404 continue ;
@@ -386,7 +414,9 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
386414 break ;
387415 }
388416
389- if (2 * intercept_sum > idx_intercept_sum ) {
417+ if ((!alt_recent || 2 * recent_sum > idx_recent_sum ) &&
418+ (!alt_intercepts ||
419+ 2 * intercept_sum > idx_intercept_sum )) {
390420 idx = i ;
391421 duration_ns = span_ns ;
392422 break ;
@@ -404,49 +434,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
404434 if (idx > constraint_idx )
405435 idx = constraint_idx ;
406436
407- if (idx > idx0 ) {
408- unsigned int count = 0 ;
409- u64 sum = 0 ;
410-
411- /*
412- * The target residencies of at least two different enabled idle
413- * states are less than or equal to the current expected idle
414- * duration. Try to refine the selection using the most recent
415- * measured idle duration values.
416- *
417- * Count and sum the most recent idle duration values less than
418- * the current expected idle duration value.
419- */
420- for (i = 0 ; i < INTERVALS ; i ++ ) {
421- u64 val = cpu_data -> intervals [i ];
422-
423- if (val >= duration_ns )
424- continue ;
425-
426- count ++ ;
427- sum += val ;
428- }
429-
430- /*
431- * Give up unless the majority of the most recent idle duration
432- * values are in the interesting range.
433- */
434- if (count > INTERVALS / 2 ) {
435- u64 avg_ns = div64_u64 (sum , count );
436-
437- /*
438- * Avoid spending too much time in an idle state that
439- * would be too shallow.
440- */
441- if (teo_time_ok (avg_ns )) {
442- duration_ns = avg_ns ;
443- if (drv -> states [idx ].target_residency_ns > avg_ns )
444- idx = teo_find_shallower_state (drv , dev ,
445- idx , avg_ns );
446- }
447- }
448- }
449-
450437end :
451438 /*
452439 * Don't stop the tick if the selected state is a polling one or if the
@@ -507,8 +494,8 @@ static int teo_enable_device(struct cpuidle_driver *drv,
507494
508495 memset (cpu_data , 0 , sizeof (* cpu_data ));
509496
510- for (i = 0 ; i < INTERVALS ; i ++ )
511- cpu_data -> intervals [i ] = U64_MAX ;
497+ for (i = 0 ; i < NR_RECENT ; i ++ )
498+ cpu_data -> recent_idx [i ] = -1 ;
512499
513500 return 0 ;
514501}
0 commit comments