Skip to content

Commit 5394ad3

Browse files
committed
Merge branch 'pm-cpuidle' into linux-next
* pm-cpuidle: Documentation: ABI: testing: document the new cpuidle sysfs file Documentation: admin-guide: pm: Document intel_idle C1 demotion intel_idle: Add C1 demotion on/off sysfs knob cpuidle: psci: Transition to the faux device interface cpuidle: menu: Optimize bucket assignment when next_timer_ns equals KTIME_MAX cpuidle: teo: Fix typos in two comments
2 parents 16691e9 + e80e134 commit 5394ad3

6 files changed

Lines changed: 141 additions & 21 deletions

File tree

Documentation/ABI/testing/sysfs-devices-system-cpu

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ What: /sys/devices/system/cpu/cpuidle/available_governors
111111
/sys/devices/system/cpu/cpuidle/current_driver
112112
/sys/devices/system/cpu/cpuidle/current_governor
113113
/sys/devices/system/cpu/cpuidle/current_governer_ro
114+
/sys/devices/system/cpu/cpuidle/intel_c1_demotion
114115
Date: September 2007
115116
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
116117
Description: Discover cpuidle policy and mechanism
@@ -132,7 +133,11 @@ Description: Discover cpuidle policy and mechanism
132133

133134
current_governor_ro: (RO) displays current idle policy.
134135

135-
See Documentation/admin-guide/pm/cpuidle.rst and
136+
intel_c1_demotion: (RW) enables/disables the C1 demotion
137+
feature on Intel CPUs.
138+
139+
See Documentation/admin-guide/pm/cpuidle.rst,
140+
Documentation/admin-guide/pm/intel_idle.rst, and
136141
Documentation/driver-api/pm/cpuidle.rst for more information.
137142

138143

Documentation/admin-guide/pm/intel_idle.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,27 @@ instruction at all.
3838
only way to pass early-configuration-time parameters to it is via the kernel
3939
command line.
4040

41+
Sysfs Interface
42+
===============
43+
44+
The ``intel_idle`` driver exposes the following ``sysfs`` attributes in
45+
``/sys/devices/system/cpu/cpuidle/``:
46+
47+
``intel_c1_demotion``
48+
Enable or disable C1 demotion for all CPUs in the system. This file is
49+
only exposed on platforms that support the C1 demotion feature and where
50+
it was tested. Value 0 means that C1 demotion is disabled, value 1 means
51+
that it is enabled. Write 0 or 1 to disable or enable C1 demotion for
52+
all CPUs.
53+
54+
The C1 demotion feature involves the platform firmware demoting deep
55+
C-state requests from the OS (e.g., C6 requests) to C1. The idea is that
56+
firmware monitors CPU wake-up rate, and if it is higher than a
57+
platform-specific threshold, the firmware demotes deep C-state requests
58+
to C1. For example, Linux requests C6, but firmware noticed too many
59+
wake-ups per second, and it keeps the CPU in C1. When the CPU stays in
60+
C1 long enough, the platform promotes it back to C6. This may improve
61+
some workloads' performance, but it may also increase power consumption.
4162

4263
.. _intel-idle-enumeration-of-states:
4364

drivers/cpuidle/cpuidle-psci.c

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#include <linux/kernel.h>
1717
#include <linux/module.h>
1818
#include <linux/of.h>
19-
#include <linux/platform_device.h>
19+
#include <linux/device/faux.h>
2020
#include <linux/psci.h>
2121
#include <linux/pm_domain.h>
2222
#include <linux/pm_runtime.h>
@@ -407,14 +407,14 @@ static int psci_idle_init_cpu(struct device *dev, int cpu)
407407
* to register cpuidle driver then rollback to cancel all CPUs
408408
* registration.
409409
*/
410-
static int psci_cpuidle_probe(struct platform_device *pdev)
410+
static int psci_cpuidle_probe(struct faux_device *fdev)
411411
{
412412
int cpu, ret;
413413
struct cpuidle_driver *drv;
414414
struct cpuidle_device *dev;
415415

416416
for_each_present_cpu(cpu) {
417-
ret = psci_idle_init_cpu(&pdev->dev, cpu);
417+
ret = psci_idle_init_cpu(&fdev->dev, cpu);
418418
if (ret)
419419
goto out_fail;
420420
}
@@ -434,26 +434,18 @@ static int psci_cpuidle_probe(struct platform_device *pdev)
434434
return ret;
435435
}
436436

437-
static struct platform_driver psci_cpuidle_driver = {
437+
static struct faux_device_ops psci_cpuidle_ops = {
438438
.probe = psci_cpuidle_probe,
439-
.driver = {
440-
.name = "psci-cpuidle",
441-
},
442439
};
443440

444441
static int __init psci_idle_init(void)
445442
{
446-
struct platform_device *pdev;
447-
int ret;
443+
struct faux_device *fdev;
448444

449-
ret = platform_driver_register(&psci_cpuidle_driver);
450-
if (ret)
451-
return ret;
452-
453-
pdev = platform_device_register_simple("psci-cpuidle", -1, NULL, 0);
454-
if (IS_ERR(pdev)) {
455-
platform_driver_unregister(&psci_cpuidle_driver);
456-
return PTR_ERR(pdev);
445+
fdev = faux_device_create("psci-cpuidle", NULL, &psci_cpuidle_ops);
446+
if (!fdev) {
447+
pr_err("Failed to create psci-cpuidle device\n");
448+
return -ENODEV;
457449
}
458450

459451
return 0;

drivers/cpuidle/governors/menu.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
255255
*/
256256
data->next_timer_ns = KTIME_MAX;
257257
delta_tick = TICK_NSEC / 2;
258-
data->bucket = which_bucket(KTIME_MAX);
258+
data->bucket = BUCKETS - 1;
259259
}
260260

261261
if (unlikely(drv->state_count <= 1 || latency_req == 0) ||

drivers/cpuidle/governors/teo.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
*
2020
* Of course, non-timer wakeup sources are more important in some use cases,
2121
* but even then it is generally unnecessary to consider idle duration values
22-
* greater than the time time till the next timer event, referred as the sleep
22+
* greater than the time till the next timer event, referred as the sleep
2323
* length in what follows, because the closest timer will ultimately wake up the
2424
* CPU anyway unless it is woken up earlier.
2525
*
@@ -311,7 +311,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
311311
struct cpuidle_state *s = &drv->states[i];
312312

313313
/*
314-
* Update the sums of idle state mertics for all of the states
314+
* Update the sums of idle state metrics for all of the states
315315
* shallower than the current one.
316316
*/
317317
intercept_sum += prev_bin->intercepts;

drivers/idle/intel_idle.c

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,11 @@
4848
#include <trace/events/power.h>
4949
#include <linux/sched.h>
5050
#include <linux/sched/smt.h>
51+
#include <linux/mutex.h>
5152
#include <linux/notifier.h>
5253
#include <linux/cpu.h>
5354
#include <linux/moduleparam.h>
55+
#include <linux/sysfs.h>
5456
#include <asm/cpuid.h>
5557
#include <asm/cpu_device_id.h>
5658
#include <asm/intel-family.h>
@@ -92,9 +94,15 @@ struct idle_cpu {
9294
*/
9395
unsigned long auto_demotion_disable_flags;
9496
bool disable_promotion_to_c1e;
97+
bool c1_demotion_supported;
9598
bool use_acpi;
9699
};
97100

101+
static bool c1_demotion_supported;
102+
static DEFINE_MUTEX(c1_demotion_mutex);
103+
104+
static struct device *sysfs_root __initdata;
105+
98106
static const struct idle_cpu *icpu __initdata;
99107
static struct cpuidle_state *cpuidle_state_table __initdata;
100108

@@ -1549,18 +1557,21 @@ static const struct idle_cpu idle_cpu_gmt __initconst = {
15491557
static const struct idle_cpu idle_cpu_spr __initconst = {
15501558
.state_table = spr_cstates,
15511559
.disable_promotion_to_c1e = true,
1560+
.c1_demotion_supported = true,
15521561
.use_acpi = true,
15531562
};
15541563

15551564
static const struct idle_cpu idle_cpu_gnr __initconst = {
15561565
.state_table = gnr_cstates,
15571566
.disable_promotion_to_c1e = true,
1567+
.c1_demotion_supported = true,
15581568
.use_acpi = true,
15591569
};
15601570

15611571
static const struct idle_cpu idle_cpu_gnrd __initconst = {
15621572
.state_table = gnrd_cstates,
15631573
.disable_promotion_to_c1e = true,
1574+
.c1_demotion_supported = true,
15641575
.use_acpi = true,
15651576
};
15661577

@@ -1599,12 +1610,14 @@ static const struct idle_cpu idle_cpu_snr __initconst = {
15991610
static const struct idle_cpu idle_cpu_grr __initconst = {
16001611
.state_table = grr_cstates,
16011612
.disable_promotion_to_c1e = true,
1613+
.c1_demotion_supported = true,
16021614
.use_acpi = true,
16031615
};
16041616

16051617
static const struct idle_cpu idle_cpu_srf __initconst = {
16061618
.state_table = srf_cstates,
16071619
.disable_promotion_to_c1e = true,
1620+
.c1_demotion_supported = true,
16081621
.use_acpi = true,
16091622
};
16101623

@@ -2324,6 +2337,88 @@ static void __init intel_idle_cpuidle_devices_uninit(void)
23242337
cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
23252338
}
23262339

2340+
static void intel_c1_demotion_toggle(void *enable)
2341+
{
2342+
unsigned long long msr_val;
2343+
2344+
rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
2345+
/*
2346+
* Enable/disable C1 undemotion along with C1 demotion, as this is the
2347+
* most sensible configuration in general.
2348+
*/
2349+
if (enable)
2350+
msr_val |= NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE;
2351+
else
2352+
msr_val &= ~(NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE);
2353+
wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
2354+
}
2355+
2356+
static ssize_t intel_c1_demotion_store(struct device *dev,
2357+
struct device_attribute *attr,
2358+
const char *buf, size_t count)
2359+
{
2360+
bool enable;
2361+
int err;
2362+
2363+
err = kstrtobool(buf, &enable);
2364+
if (err)
2365+
return err;
2366+
2367+
mutex_lock(&c1_demotion_mutex);
2368+
/* Enable/disable C1 demotion on all CPUs */
2369+
on_each_cpu(intel_c1_demotion_toggle, (void *)enable, 1);
2370+
mutex_unlock(&c1_demotion_mutex);
2371+
2372+
return count;
2373+
}
2374+
2375+
static ssize_t intel_c1_demotion_show(struct device *dev,
2376+
struct device_attribute *attr, char *buf)
2377+
{
2378+
unsigned long long msr_val;
2379+
2380+
/*
2381+
* Read the MSR value for a CPU and assume it is the same for all CPUs. Any other
2382+
* configuration would be a BIOS bug.
2383+
*/
2384+
rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_val);
2385+
return sysfs_emit(buf, "%d\n", !!(msr_val & NHM_C1_AUTO_DEMOTE));
2386+
}
2387+
static DEVICE_ATTR_RW(intel_c1_demotion);
2388+
2389+
static int __init intel_idle_sysfs_init(void)
2390+
{
2391+
int err;
2392+
2393+
if (!c1_demotion_supported)
2394+
return 0;
2395+
2396+
sysfs_root = bus_get_dev_root(&cpu_subsys);
2397+
if (!sysfs_root)
2398+
return 0;
2399+
2400+
err = sysfs_add_file_to_group(&sysfs_root->kobj,
2401+
&dev_attr_intel_c1_demotion.attr,
2402+
"cpuidle");
2403+
if (err) {
2404+
put_device(sysfs_root);
2405+
return err;
2406+
}
2407+
2408+
return 0;
2409+
}
2410+
2411+
static void __init intel_idle_sysfs_uninit(void)
2412+
{
2413+
if (!sysfs_root)
2414+
return;
2415+
2416+
sysfs_remove_file_from_group(&sysfs_root->kobj,
2417+
&dev_attr_intel_c1_demotion.attr,
2418+
"cpuidle");
2419+
put_device(sysfs_root);
2420+
}
2421+
23272422
static int __init intel_idle_init(void)
23282423
{
23292424
const struct x86_cpu_id *id;
@@ -2374,6 +2469,8 @@ static int __init intel_idle_init(void)
23742469
auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
23752470
if (icpu->disable_promotion_to_c1e)
23762471
c1e_promotion = C1E_PROMOTION_DISABLE;
2472+
if (icpu->c1_demotion_supported)
2473+
c1_demotion_supported = true;
23772474
if (icpu->use_acpi || force_use_acpi)
23782475
intel_idle_acpi_cst_extract();
23792476
} else if (!intel_idle_acpi_cst_extract()) {
@@ -2387,6 +2484,10 @@ static int __init intel_idle_init(void)
23872484
if (!intel_idle_cpuidle_devices)
23882485
return -ENOMEM;
23892486

2487+
retval = intel_idle_sysfs_init();
2488+
if (retval)
2489+
pr_warn("failed to initialized sysfs");
2490+
23902491
intel_idle_cpuidle_driver_init(&intel_idle_driver);
23912492

23922493
retval = cpuidle_register_driver(&intel_idle_driver);
@@ -2411,6 +2512,7 @@ static int __init intel_idle_init(void)
24112512
intel_idle_cpuidle_devices_uninit();
24122513
cpuidle_unregister_driver(&intel_idle_driver);
24132514
init_driver_fail:
2515+
intel_idle_sysfs_uninit();
24142516
free_percpu(intel_idle_cpuidle_devices);
24152517
return retval;
24162518

0 commit comments

Comments
 (0)