Skip to content

Commit edc6d6c

Browse files
Pavankumar KondetiExactExampl
authored andcommitted
Revert "sched: Remove sched_ktime_clock()"
This reverts 'commit 24c1812 ("sched: Remove sched_ktime_clock()")' WALT accounting uses ktime_get() as time source to keep windows in align with the tick. ktime_get() API should not be called while the timekeeping subsystem is suspended during the system suspend. The code before the reverted patch has a wrapper around ktime_get() to avoid calling ktime_get() when timekeeping subsystem is suspended. The reverted patch removed this wrapper with the assumption that there will not be any scheduler activity while timekeeping subsystem is suspended. The timekeeping subsystem is resumed very early even before non-boot CPUs are brought online. However it is possible that tasks can wake up from the idle notifiers which gets called before timekeeping subsystem is resumed. When this happens, the time read from ktime_get() will not be consistent. We see a jump from the values that would be returned later when timekeeping subsystem is resumed. The rq->window_start update happens with incorrect time. This rq->window_start becomes inconsistent with the rest of the CPUs's rq->window_start and wallclock time after timekeeping subsystem is resumed. This results in WALT accounting bugs. Change-Id: I9c3b2fb9ffbf1103d1bd78778882450560dac09f Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org> (cherry picked from commit faa04442e7a31357724dbb8e49ba64372ef37862)
1 parent 8c39ff6 commit edc6d6c

6 files changed

Lines changed: 60 additions & 20 deletions

File tree

kernel/sched/core.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2186,7 +2186,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
21862186
rq = cpu_rq(task_cpu(p));
21872187
raw_spin_lock(&rq->lock);
21882188
old_load = task_load(p);
2189-
wallclock = ktime_get_ns();
2189+
wallclock = sched_ktime_clock();
21902190
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
21912191
update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
21922192
raw_spin_unlock(&rq->lock);
@@ -2272,7 +2272,7 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf)
22722272
trace_sched_waking(p);
22732273

22742274
if (!task_on_rq_queued(p)) {
2275-
u64 wallclock = ktime_get_ns();
2275+
u64 wallclock = sched_ktime_clock();
22762276

22772277
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
22782278
update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
@@ -3269,7 +3269,7 @@ void scheduler_tick(void)
32693269
old_load = task_load(curr);
32703270
set_window_start(rq);
32713271

3272-
wallclock = ktime_get_ns();
3272+
wallclock = sched_ktime_clock();
32733273
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
32743274

32753275
update_rq_clock(rq);
@@ -3641,7 +3641,7 @@ static void __sched notrace __schedule(bool preempt)
36413641
clear_preempt_need_resched();
36423642
rq->clock_skip_update = 0;
36433643

3644-
wallclock = ktime_get_ns();
3644+
wallclock = sched_ktime_clock();
36453645
if (likely(prev != next)) {
36463646
prev->last_cpu_deselected_ts = wallclock;
36473647
if (!prev->on_rq)
@@ -9656,7 +9656,7 @@ void sched_exit(struct task_struct *p)
96569656
rq = task_rq_lock(p, &rf);
96579657

96589658
/* rq->curr == p */
9659-
wallclock = ktime_get_ns();
9659+
wallclock = sched_ktime_clock();
96609660
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
96619661
dequeue_task(rq, p, 0);
96629662
/*

kernel/sched/cpufreq_schedutil.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,7 @@ static void sugov_work(struct kthread_work *work)
544544
mutex_lock(&sg_policy->work_lock);
545545
raw_spin_lock_irqsave(&sg_policy->update_lock, flags);
546546
sugov_track_cycles(sg_policy, sg_policy->policy->cur,
547-
ktime_get_ns());
547+
sched_ktime_clock());
548548
raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags);
549549
__cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq,
550550
CPUFREQ_RELATION_L);
@@ -1052,7 +1052,7 @@ static void sugov_limits(struct cpufreq_policy *policy)
10521052
mutex_lock(&sg_policy->work_lock);
10531053
raw_spin_lock_irqsave(&sg_policy->update_lock, flags);
10541054
sugov_track_cycles(sg_policy, sg_policy->policy->cur,
1055-
ktime_get_ns());
1055+
sched_ktime_clock());
10561056
raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags);
10571057
cpufreq_policy_apply_limits(policy);
10581058
mutex_unlock(&sg_policy->work_lock);

kernel/sched/fair.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11967,7 +11967,7 @@ static void walt_check_for_rotation(struct rq *src_rq)
1196711967
if (is_max_capacity_cpu(src_cpu))
1196811968
return;
1196911969

11970-
wc = ktime_get_ns();
11970+
wc = sched_ktime_clock();
1197111971
for_each_possible_cpu(i) {
1197211972
struct rq *rq = cpu_rq(i);
1197311973

kernel/sched/sched.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2335,8 +2335,13 @@ static inline u64 irq_time_read(int cpu)
23352335
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
23362336

23372337
#ifdef CONFIG_SCHED_WALT
2338+
u64 sched_ktime_clock(void);
23382339
void note_task_waking(struct task_struct *p, u64 wallclock);
23392340
#else /* CONFIG_SCHED_WALT */
2341+
static inline u64 sched_ktime_clock(void)
2342+
{
2343+
return 0;
2344+
}
23402345
static inline void note_task_waking(struct task_struct *p, u64 wallclock) { }
23412346
#endif /* CONFIG_SCHED_WALT */
23422347

@@ -2372,7 +2377,7 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
23722377
data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
23732378
cpu_of(rq)));
23742379
if (data)
2375-
data->func(data, ktime_get_ns(), flags);
2380+
data->func(data, sched_ktime_clock(), flags);
23762381
}
23772382

23782383
static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)

kernel/sched/walt.c

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
* and Todd Kjos
2020
*/
2121

22+
#include <linux/syscore_ops.h>
2223
#include <linux/cpufreq.h>
2324
#include <linux/list_sort.h>
2425
#include <linux/jiffies.h>
@@ -41,6 +42,8 @@ const char *migrate_type_names[] = {"GROUP_TO_RQ", "RQ_TO_GROUP",
4142

4243
#define EARLY_DETECTION_DURATION 9500000
4344

45+
static ktime_t ktime_last;
46+
static bool sched_ktime_suspended;
4447
static struct cpu_cycle_counter_cb cpu_cycle_counter_cb;
4548
static bool use_cycle_counter;
4649
DEFINE_MUTEX(cluster_lock);
@@ -50,6 +53,37 @@ u64 walt_load_reported_window;
5053
static struct irq_work walt_cpufreq_irq_work;
5154
static struct irq_work walt_migration_irq_work;
5255

56+
u64 sched_ktime_clock(void)
57+
{
58+
if (unlikely(sched_ktime_suspended))
59+
return ktime_to_ns(ktime_last);
60+
return ktime_get_ns();
61+
}
62+
63+
static void sched_resume(void)
64+
{
65+
sched_ktime_suspended = false;
66+
}
67+
68+
static int sched_suspend(void)
69+
{
70+
ktime_last = ktime_get();
71+
sched_ktime_suspended = true;
72+
return 0;
73+
}
74+
75+
static struct syscore_ops sched_syscore_ops = {
76+
.resume = sched_resume,
77+
.suspend = sched_suspend
78+
};
79+
80+
static int __init sched_init_ops(void)
81+
{
82+
register_syscore_ops(&sched_syscore_ops);
83+
return 0;
84+
}
85+
late_initcall(sched_init_ops);
86+
5387
static void acquire_rq_locks_irqsave(const cpumask_t *cpus,
5488
unsigned long *flags)
5589
{
@@ -371,7 +405,7 @@ void sched_account_irqstart(int cpu, struct task_struct *curr, u64 wallclock)
371405
if (is_idle_task(curr)) {
372406
/* We're here without rq->lock held, IRQ disabled */
373407
raw_spin_lock(&rq->lock);
374-
update_task_cpu_cycles(curr, cpu, ktime_get_ns());
408+
update_task_cpu_cycles(curr, cpu, sched_ktime_clock());
375409
raw_spin_unlock(&rq->lock);
376410
}
377411
}
@@ -432,7 +466,7 @@ void sched_account_irqtime(int cpu, struct task_struct *curr,
432466
cur_jiffies_ts = get_jiffies_64();
433467

434468
if (is_idle_task(curr))
435-
update_task_ravg(curr, rq, IRQ_UPDATE, ktime_get_ns(),
469+
update_task_ravg(curr, rq, IRQ_UPDATE, sched_ktime_clock(),
436470
delta);
437471

438472
nr_windows = cur_jiffies_ts - rq->irqload_ts;
@@ -765,7 +799,7 @@ void fixup_busy_time(struct task_struct *p, int new_cpu)
765799
if (sched_disable_window_stats)
766800
goto done;
767801

768-
wallclock = ktime_get_ns();
802+
wallclock = sched_ktime_clock();
769803

770804
update_task_ravg(task_rq(p)->curr, task_rq(p),
771805
TASK_UPDATE,
@@ -2052,7 +2086,7 @@ void mark_task_starting(struct task_struct *p)
20522086
return;
20532087
}
20542088

2055-
wallclock = ktime_get_ns();
2089+
wallclock = sched_ktime_clock();
20562090
p->ravg.mark_start = p->last_wake_ts = wallclock;
20572091
p->last_enqueued_ts = wallclock;
20582092
p->last_switch_out_ts = 0;
@@ -2454,7 +2488,7 @@ static int cpufreq_notifier_trans(struct notifier_block *nb,
24542488

24552489
raw_spin_lock_irqsave(&rq->lock, flags);
24562490
update_task_ravg(rq->curr, rq, TASK_UPDATE,
2457-
ktime_get_ns(), 0);
2491+
sched_ktime_clock(), 0);
24582492
raw_spin_unlock_irqrestore(&rq->lock, flags);
24592493
}
24602494
}
@@ -2604,7 +2638,7 @@ static void _set_preferred_cluster(struct related_thread_group *grp)
26042638
return;
26052639
}
26062640

2607-
wallclock = ktime_get_ns();
2641+
wallclock = sched_ktime_clock();
26082642

26092643
/*
26102644
* wakeup of two or more related tasks could race with each other and
@@ -2631,7 +2665,7 @@ static void _set_preferred_cluster(struct related_thread_group *grp)
26312665

26322666
grp->preferred_cluster = best_cluster(grp,
26332667
combined_demand, group_boost);
2634-
grp->last_update = ktime_get_ns();
2668+
grp->last_update = sched_ktime_clock();
26352669
trace_sched_set_preferred_cluster(grp, combined_demand);
26362670
}
26372671

@@ -2655,7 +2689,7 @@ int update_preferred_cluster(struct related_thread_group *grp,
26552689
* has passed since we last updated preference
26562690
*/
26572691
if (abs(new_load - old_load) > sched_ravg_window / 4 ||
2658-
ktime_get_ns() - grp->last_update > sched_ravg_window)
2692+
sched_ktime_clock() - grp->last_update > sched_ravg_window)
26592693
return 1;
26602694

26612695
return 0;
@@ -3038,7 +3072,7 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp,
30383072
bool new_task;
30393073
int i;
30403074

3041-
wallclock = ktime_get_ns();
3075+
wallclock = sched_ktime_clock();
30423076

30433077
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
30443078
update_task_ravg(p, rq, TASK_UPDATE, wallclock, 0);
@@ -3151,8 +3185,9 @@ void walt_irq_work(struct irq_work *irq_work)
31513185
level++;
31523186
}
31533187

3154-
wc = ktime_get_ns();
3188+
wc = sched_ktime_clock();
31553189
walt_load_reported_window = atomic64_read(&walt_irq_work_lastq_ws);
3190+
31563191
for_each_sched_cluster(cluster) {
31573192
u64 aggr_grp_load = 0;
31583193

kernel/sched/walt.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ static inline int walt_start_cpu(int prev_cpu)
292292

293293
static inline void walt_update_last_enqueue(struct task_struct *p)
294294
{
295-
p->last_enqueued_ts = ktime_get_ns();
295+
p->last_enqueued_ts = sched_ktime_clock();
296296
}
297297
extern void walt_rotate_work_init(void);
298298
extern void walt_rotation_checkpoint(int nr_big);

0 commit comments

Comments
 (0)