1 | /* $NetBSD: acpi_cpu_md.c,v 1.77 2014/04/17 16:01:24 christos Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 2010, 2011 Jukka Ruohonen <jruohonen@iki.fi> |
5 | * All rights reserved. |
6 | * |
7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions |
9 | * are met: |
10 | * |
11 | * 1. Redistributions of source code must retain the above copyright |
12 | * notice, this list of conditions and the following disclaimer. |
13 | * 2. Redistributions in binary form must reproduce the above copyright |
14 | * notice, this list of conditions and the following disclaimer in the |
15 | * documentation and/or other materials provided with the distribution. |
16 | * |
17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
27 | * SUCH DAMAGE. |
28 | */ |
29 | #include <sys/cdefs.h> |
30 | __KERNEL_RCSID(0, "$NetBSD: acpi_cpu_md.c,v 1.77 2014/04/17 16:01:24 christos Exp $" ); |
31 | |
32 | #include <sys/param.h> |
33 | #include <sys/bus.h> |
34 | #include <sys/cpufreq.h> |
35 | #include <sys/device.h> |
36 | #include <sys/kcore.h> |
37 | #include <sys/sysctl.h> |
38 | #include <sys/xcall.h> |
39 | |
40 | #include <x86/cpu.h> |
41 | #include <x86/cpufunc.h> |
42 | #include <x86/cputypes.h> |
43 | #include <x86/cpuvar.h> |
44 | #include <x86/cpu_msr.h> |
45 | #include <x86/machdep.h> |
46 | #include <x86/x86/tsc.h> |
47 | |
48 | #include <dev/acpi/acpica.h> |
49 | #include <dev/acpi/acpi_cpu.h> |
50 | |
51 | #include <dev/pci/pcivar.h> |
52 | #include <dev/pci/pcidevs.h> |
53 | |
54 | #include <machine/acpi_machdep.h> |
55 | |
56 | /* |
57 | * Intel IA32_MISC_ENABLE. |
58 | */ |
59 | #define MSR_MISC_ENABLE_EST __BIT(16) |
60 | #define MSR_MISC_ENABLE_TURBO __BIT(38) |
61 | |
62 | /* |
63 | * AMD C1E. |
64 | */ |
65 | #define MSR_CMPHALT 0xc0010055 |
66 | |
67 | #define MSR_CMPHALT_SMI __BIT(27) |
68 | #define MSR_CMPHALT_C1E __BIT(28) |
69 | #define MSR_CMPHALT_BMSTS __BIT(29) |
70 | |
71 | /* |
72 | * AMD families 10h, 11h, 12h, 14h, and 15h. |
73 | */ |
74 | #define MSR_10H_LIMIT 0xc0010061 |
75 | #define MSR_10H_CONTROL 0xc0010062 |
76 | #define MSR_10H_STATUS 0xc0010063 |
77 | #define MSR_10H_CONFIG 0xc0010064 |
78 | |
79 | /* |
80 | * AMD family 0Fh. |
81 | */ |
82 | #define MSR_0FH_CONTROL 0xc0010041 |
83 | #define MSR_0FH_STATUS 0xc0010042 |
84 | |
85 | #define MSR_0FH_STATUS_CFID __BITS( 0, 5) |
86 | #define MSR_0FH_STATUS_CVID __BITS(32, 36) |
87 | #define MSR_0FH_STATUS_PENDING __BITS(31, 31) |
88 | |
89 | #define MSR_0FH_CONTROL_FID __BITS( 0, 5) |
90 | #define MSR_0FH_CONTROL_VID __BITS( 8, 12) |
91 | #define MSR_0FH_CONTROL_CHG __BITS(16, 16) |
92 | #define MSR_0FH_CONTROL_CNT __BITS(32, 51) |
93 | |
94 | #define ACPI_0FH_STATUS_FID __BITS( 0, 5) |
95 | #define ACPI_0FH_STATUS_VID __BITS( 6, 10) |
96 | |
97 | #define ACPI_0FH_CONTROL_FID __BITS( 0, 5) |
98 | #define ACPI_0FH_CONTROL_VID __BITS( 6, 10) |
99 | #define ACPI_0FH_CONTROL_VST __BITS(11, 17) |
100 | #define ACPI_0FH_CONTROL_MVS __BITS(18, 19) |
101 | #define ACPI_0FH_CONTROL_PLL __BITS(20, 26) |
102 | #define ACPI_0FH_CONTROL_RVO __BITS(28, 29) |
103 | #define ACPI_0FH_CONTROL_IRT __BITS(30, 31) |
104 | |
105 | #define FID_TO_VCO_FID(fidd) (((fid) < 8) ? (8 + ((fid) << 1)) : (fid)) |
106 | |
107 | static char native_idle_text[16]; |
108 | void (*native_idle)(void) = NULL; |
109 | |
110 | static int acpicpu_md_quirk_piix4(const struct pci_attach_args *); |
111 | static void acpicpu_md_pstate_hwf_reset(void *, void *); |
112 | static int acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *, |
113 | uint32_t *); |
114 | static int acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *); |
115 | static int acpicpu_md_pstate_fidvid_read(uint32_t *, uint32_t *); |
116 | static void acpicpu_md_pstate_fidvid_write(uint32_t, uint32_t, |
117 | uint32_t, uint32_t); |
118 | static int acpicpu_md_pstate_sysctl_init(void); |
119 | static int acpicpu_md_pstate_sysctl_get(SYSCTLFN_PROTO); |
120 | static int acpicpu_md_pstate_sysctl_set(SYSCTLFN_PROTO); |
121 | static int acpicpu_md_pstate_sysctl_all(SYSCTLFN_PROTO); |
122 | |
123 | extern struct acpicpu_softc **acpicpu_sc; |
124 | static struct sysctllog *acpicpu_log = NULL; |
125 | |
126 | struct cpu_info * |
127 | acpicpu_md_match(device_t parent, cfdata_t match, void *aux) |
128 | { |
129 | struct cpufeature_attach_args *cfaa = aux; |
130 | |
131 | if (strcmp(cfaa->name, "frequency" ) != 0) |
132 | return NULL; |
133 | |
134 | return cfaa->ci; |
135 | } |
136 | |
137 | struct cpu_info * |
138 | acpicpu_md_attach(device_t parent, device_t self, void *aux) |
139 | { |
140 | struct cpufeature_attach_args *cfaa = aux; |
141 | |
142 | return cfaa->ci; |
143 | } |
144 | |
145 | uint32_t |
146 | acpicpu_md_flags(void) |
147 | { |
148 | struct cpu_info *ci = curcpu(); |
149 | struct pci_attach_args pa; |
150 | uint32_t family, val = 0; |
151 | uint32_t regs[4]; |
152 | uint64_t msr; |
153 | |
154 | if (acpi_md_ncpus() == 1) |
155 | val |= ACPICPU_FLAG_C_BM; |
156 | |
157 | if ((ci->ci_feat_val[1] & CPUID2_MONITOR) != 0) |
158 | val |= ACPICPU_FLAG_C_FFH; |
159 | |
160 | /* |
161 | * By default, assume that the local APIC timer |
162 | * as well as TSC are stalled during C3 sleep. |
163 | */ |
164 | val |= ACPICPU_FLAG_C_APIC | ACPICPU_FLAG_C_TSC; |
165 | |
166 | /* |
167 | * Detect whether TSC is invariant. If it is not, we keep the flag to |
168 | * note that TSC will not run at constant rate. Depending on the CPU, |
169 | * this may affect P- and T-state changes, but especially relevant |
170 | * are C-states; with variant TSC, states larger than C1 may |
171 | * completely stop the counter. |
172 | */ |
173 | if (tsc_is_invariant()) |
174 | val &= ~ACPICPU_FLAG_C_TSC; |
175 | |
176 | switch (cpu_vendor) { |
177 | |
178 | case CPUVENDOR_IDT: |
179 | |
180 | if ((ci->ci_feat_val[1] & CPUID2_EST) != 0) |
181 | val |= ACPICPU_FLAG_P_FFH; |
182 | |
183 | if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0) |
184 | val |= ACPICPU_FLAG_T_FFH; |
185 | |
186 | break; |
187 | |
188 | case CPUVENDOR_INTEL: |
189 | |
190 | /* |
191 | * Bus master control and arbitration should be |
192 | * available on all supported Intel CPUs (to be |
193 | * sure, this is double-checked later from the |
194 | * firmware data). These flags imply that it is |
195 | * not necessary to flush caches before C3 state. |
196 | */ |
197 | val |= ACPICPU_FLAG_C_BM | ACPICPU_FLAG_C_ARB; |
198 | |
199 | /* |
200 | * Check if we can use "native", MSR-based, |
201 | * access. If not, we have to resort to I/O. |
202 | */ |
203 | if ((ci->ci_feat_val[1] & CPUID2_EST) != 0) |
204 | val |= ACPICPU_FLAG_P_FFH; |
205 | |
206 | if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0) |
207 | val |= ACPICPU_FLAG_T_FFH; |
208 | |
209 | /* |
210 | * Check whether MSR_APERF, MSR_MPERF, and Turbo |
211 | * Boost are available. Also see if we might have |
212 | * an invariant local APIC timer ("ARAT"). |
213 | */ |
214 | if (cpuid_level >= 0x06) { |
215 | |
216 | x86_cpuid(0x00000006, regs); |
217 | |
218 | if ((regs[2] & CPUID_DSPM_HWF) != 0) |
219 | val |= ACPICPU_FLAG_P_HWF; |
220 | |
221 | if ((regs[0] & CPUID_DSPM_IDA) != 0) |
222 | val |= ACPICPU_FLAG_P_TURBO; |
223 | |
224 | if ((regs[0] & CPUID_DSPM_ARAT) != 0) |
225 | val &= ~ACPICPU_FLAG_C_APIC; |
226 | } |
227 | |
228 | break; |
229 | |
230 | case CPUVENDOR_AMD: |
231 | |
232 | x86_cpuid(0x80000000, regs); |
233 | |
234 | if (regs[0] < 0x80000007) |
235 | break; |
236 | |
237 | x86_cpuid(0x80000007, regs); |
238 | |
239 | family = CPUID_TO_FAMILY(ci->ci_signature); |
240 | |
241 | switch (family) { |
242 | |
243 | case 0x0f: |
244 | |
245 | /* |
246 | * Disable C1E if present. |
247 | */ |
248 | if (rdmsr_safe(MSR_CMPHALT, &msr) != EFAULT) |
249 | val |= ACPICPU_FLAG_C_C1E; |
250 | |
251 | /* |
252 | * Evaluate support for the "FID/VID |
253 | * algorithm" also used by powernow(4). |
254 | */ |
255 | if ((regs[3] & CPUID_APM_FID) == 0) |
256 | break; |
257 | |
258 | if ((regs[3] & CPUID_APM_VID) == 0) |
259 | break; |
260 | |
261 | val |= ACPICPU_FLAG_P_FFH | ACPICPU_FLAG_P_FIDVID; |
262 | break; |
263 | |
264 | case 0x10: |
265 | case 0x11: |
266 | |
267 | /* |
268 | * Disable C1E if present. |
269 | */ |
270 | if (rdmsr_safe(MSR_CMPHALT, &msr) != EFAULT) |
271 | val |= ACPICPU_FLAG_C_C1E; |
272 | |
273 | /* FALLTHROUGH */ |
274 | |
275 | case 0x12: |
276 | case 0x14: /* AMD Fusion */ |
277 | case 0x15: /* AMD Bulldozer */ |
278 | |
279 | /* |
280 | * Like with Intel, detect MSR-based P-states, |
281 | * and AMD's "turbo" (Core Performance Boost), |
282 | * respectively. |
283 | */ |
284 | if ((regs[3] & CPUID_APM_HWP) != 0) |
285 | val |= ACPICPU_FLAG_P_FFH; |
286 | |
287 | if ((regs[3] & CPUID_APM_CPB) != 0) |
288 | val |= ACPICPU_FLAG_P_TURBO; |
289 | |
290 | /* |
291 | * Also check for APERF and MPERF, |
292 | * first available in the family 10h. |
293 | */ |
294 | if (cpuid_level >= 0x06) { |
295 | |
296 | x86_cpuid(0x00000006, regs); |
297 | |
298 | if ((regs[2] & CPUID_DSPM_HWF) != 0) |
299 | val |= ACPICPU_FLAG_P_HWF; |
300 | } |
301 | |
302 | break; |
303 | } |
304 | |
305 | break; |
306 | } |
307 | |
308 | /* |
309 | * There are several erratums for PIIX4. |
310 | */ |
311 | if (pci_find_device(&pa, acpicpu_md_quirk_piix4) != 0) |
312 | val |= ACPICPU_FLAG_PIIX4; |
313 | |
314 | return val; |
315 | } |
316 | |
317 | static int |
318 | acpicpu_md_quirk_piix4(const struct pci_attach_args *pa) |
319 | { |
320 | |
321 | /* |
322 | * XXX: The pci_find_device(9) function only |
323 | * deals with attached devices. Change this |
324 | * to use something like pci_device_foreach(). |
325 | */ |
326 | if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_INTEL) |
327 | return 0; |
328 | |
329 | if (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82371AB_ISA || |
330 | PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82440MX_PMC) |
331 | return 1; |
332 | |
333 | return 0; |
334 | } |
335 | |
336 | void |
337 | acpicpu_md_quirk_c1e(void) |
338 | { |
339 | const uint64_t c1e = MSR_CMPHALT_SMI | MSR_CMPHALT_C1E; |
340 | uint64_t val; |
341 | |
342 | val = rdmsr(MSR_CMPHALT); |
343 | |
344 | if ((val & c1e) != 0) |
345 | wrmsr(MSR_CMPHALT, val & ~c1e); |
346 | } |
347 | |
348 | int |
349 | acpicpu_md_cstate_start(struct acpicpu_softc *sc) |
350 | { |
351 | const size_t size = sizeof(native_idle_text); |
352 | struct acpicpu_cstate *cs; |
353 | bool ipi = false; |
354 | int i; |
355 | |
356 | /* |
357 | * Save the cpu_idle(9) loop used by default. |
358 | */ |
359 | x86_cpu_idle_get(&native_idle, native_idle_text, size); |
360 | |
361 | for (i = 0; i < ACPI_C_STATE_COUNT; i++) { |
362 | |
363 | cs = &sc->sc_cstate[i]; |
364 | |
365 | if (cs->cs_method == ACPICPU_C_STATE_HALT) { |
366 | ipi = true; |
367 | break; |
368 | } |
369 | } |
370 | |
371 | x86_cpu_idle_set(acpicpu_cstate_idle, "acpi" , ipi); |
372 | |
373 | return 0; |
374 | } |
375 | |
376 | int |
377 | acpicpu_md_cstate_stop(void) |
378 | { |
379 | static char text[16]; |
380 | void (*func)(void); |
381 | uint64_t xc; |
382 | bool ipi; |
383 | |
384 | x86_cpu_idle_get(&func, text, sizeof(text)); |
385 | |
386 | if (func == native_idle) |
387 | return EALREADY; |
388 | |
389 | ipi = (native_idle != x86_cpu_idle_halt) ? false : true; |
390 | x86_cpu_idle_set(native_idle, native_idle_text, ipi); |
391 | |
392 | /* |
393 | * Run a cross-call to ensure that all CPUs are |
394 | * out from the ACPI idle-loop before detachment. |
395 | */ |
396 | xc = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); |
397 | xc_wait(xc); |
398 | |
399 | return 0; |
400 | } |
401 | |
402 | /* |
403 | * Called with interrupts enabled. |
404 | */ |
405 | void |
406 | acpicpu_md_cstate_enter(int method, int state) |
407 | { |
408 | struct cpu_info *ci = curcpu(); |
409 | |
410 | KASSERT(ci->ci_ilevel == IPL_NONE); |
411 | |
412 | switch (method) { |
413 | |
414 | case ACPICPU_C_STATE_FFH: |
415 | |
416 | x86_monitor(&ci->ci_want_resched, 0, 0); |
417 | |
418 | if (__predict_false(ci->ci_want_resched != 0)) |
419 | return; |
420 | |
421 | x86_mwait((state - 1) << 4, 0); |
422 | break; |
423 | |
424 | case ACPICPU_C_STATE_HALT: |
425 | |
426 | x86_disable_intr(); |
427 | |
428 | if (__predict_false(ci->ci_want_resched != 0)) { |
429 | x86_enable_intr(); |
430 | return; |
431 | } |
432 | |
433 | x86_stihlt(); |
434 | break; |
435 | } |
436 | } |
437 | |
438 | int |
439 | acpicpu_md_pstate_start(struct acpicpu_softc *sc) |
440 | { |
441 | uint64_t xc, val; |
442 | |
443 | switch (cpu_vendor) { |
444 | |
445 | case CPUVENDOR_IDT: |
446 | case CPUVENDOR_INTEL: |
447 | |
448 | /* |
449 | * Make sure EST is enabled. |
450 | */ |
451 | if ((sc->sc_flags & ACPICPU_FLAG_P_FFH) != 0) { |
452 | |
453 | val = rdmsr(MSR_MISC_ENABLE); |
454 | |
455 | if ((val & MSR_MISC_ENABLE_EST) == 0) { |
456 | |
457 | val |= MSR_MISC_ENABLE_EST; |
458 | wrmsr(MSR_MISC_ENABLE, val); |
459 | val = rdmsr(MSR_MISC_ENABLE); |
460 | |
461 | if ((val & MSR_MISC_ENABLE_EST) == 0) |
462 | return ENOTTY; |
463 | } |
464 | } |
465 | } |
466 | |
467 | /* |
468 | * Reset the APERF and MPERF counters. |
469 | */ |
470 | if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) { |
471 | xc = xc_broadcast(0, acpicpu_md_pstate_hwf_reset, NULL, NULL); |
472 | xc_wait(xc); |
473 | } |
474 | |
475 | return acpicpu_md_pstate_sysctl_init(); |
476 | } |
477 | |
478 | int |
479 | acpicpu_md_pstate_stop(void) |
480 | { |
481 | |
482 | if (acpicpu_log == NULL) |
483 | return EALREADY; |
484 | |
485 | sysctl_teardown(&acpicpu_log); |
486 | acpicpu_log = NULL; |
487 | |
488 | return 0; |
489 | } |
490 | |
491 | int |
492 | acpicpu_md_pstate_init(struct acpicpu_softc *sc) |
493 | { |
494 | struct cpu_info *ci = sc->sc_ci; |
495 | struct acpicpu_pstate *ps, msr; |
496 | uint32_t family, i = 0; |
497 | |
498 | (void)memset(&msr, 0, sizeof(struct acpicpu_pstate)); |
499 | |
500 | switch (cpu_vendor) { |
501 | |
502 | case CPUVENDOR_IDT: |
503 | case CPUVENDOR_INTEL: |
504 | |
505 | /* |
506 | * If the so-called Turbo Boost is present, |
507 | * the P0-state is always the "turbo state". |
508 | * It is shown as the P1 frequency + 1 MHz. |
509 | * |
510 | * For discussion, see: |
511 | * |
512 | * Intel Corporation: Intel Turbo Boost Technology |
513 | * in Intel Core(tm) Microarchitectures (Nehalem) |
514 | * Based Processors. White Paper, November 2008. |
515 | */ |
516 | if (sc->sc_pstate_count >= 2 && |
517 | (sc->sc_flags & ACPICPU_FLAG_P_TURBO) != 0) { |
518 | |
519 | ps = &sc->sc_pstate[0]; |
520 | |
521 | if (ps->ps_freq == sc->sc_pstate[1].ps_freq + 1) |
522 | ps->ps_flags |= ACPICPU_FLAG_P_TURBO; |
523 | } |
524 | |
525 | msr.ps_control_addr = MSR_PERF_CTL; |
526 | msr.ps_control_mask = __BITS(0, 15); |
527 | |
528 | msr.ps_status_addr = MSR_PERF_STATUS; |
529 | msr.ps_status_mask = __BITS(0, 15); |
530 | break; |
531 | |
532 | case CPUVENDOR_AMD: |
533 | |
534 | if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0) |
535 | msr.ps_flags |= ACPICPU_FLAG_P_FIDVID; |
536 | |
537 | family = CPUID_TO_FAMILY(ci->ci_signature); |
538 | |
539 | switch (family) { |
540 | |
541 | case 0x0f: |
542 | msr.ps_control_addr = MSR_0FH_CONTROL; |
543 | msr.ps_status_addr = MSR_0FH_STATUS; |
544 | break; |
545 | |
546 | case 0x10: |
547 | case 0x11: |
548 | case 0x12: |
549 | case 0x14: |
550 | case 0x15: |
551 | msr.ps_control_addr = MSR_10H_CONTROL; |
552 | msr.ps_control_mask = __BITS(0, 2); |
553 | |
554 | msr.ps_status_addr = MSR_10H_STATUS; |
555 | msr.ps_status_mask = __BITS(0, 2); |
556 | break; |
557 | |
558 | default: |
559 | /* |
560 | * If we have an unknown AMD CPU, rely on XPSS. |
561 | */ |
562 | if ((sc->sc_flags & ACPICPU_FLAG_P_XPSS) == 0) |
563 | return EOPNOTSUPP; |
564 | } |
565 | |
566 | break; |
567 | |
568 | default: |
569 | return ENODEV; |
570 | } |
571 | |
572 | /* |
573 | * Fill the P-state structures with MSR addresses that are |
574 | * known to be correct. If we do not know the addresses, |
575 | * leave the values intact. If a vendor uses XPSS, we do |
576 | * not necessarily need to do anything to support new CPUs. |
577 | */ |
578 | while (i < sc->sc_pstate_count) { |
579 | |
580 | ps = &sc->sc_pstate[i]; |
581 | |
582 | if (msr.ps_flags != 0) |
583 | ps->ps_flags |= msr.ps_flags; |
584 | |
585 | if (msr.ps_status_addr != 0) |
586 | ps->ps_status_addr = msr.ps_status_addr; |
587 | |
588 | if (msr.ps_status_mask != 0) |
589 | ps->ps_status_mask = msr.ps_status_mask; |
590 | |
591 | if (msr.ps_control_addr != 0) |
592 | ps->ps_control_addr = msr.ps_control_addr; |
593 | |
594 | if (msr.ps_control_mask != 0) |
595 | ps->ps_control_mask = msr.ps_control_mask; |
596 | |
597 | i++; |
598 | } |
599 | |
600 | return 0; |
601 | } |
602 | |
603 | /* |
604 | * Read the IA32_APERF and IA32_MPERF counters. The first |
605 | * increments at the rate of the fixed maximum frequency |
606 | * configured during the boot, whereas APERF counts at the |
607 | * rate of the actual frequency. Note that the MSRs must be |
608 | * read without delay, and that only the ratio between |
609 | * IA32_APERF and IA32_MPERF is architecturally defined. |
610 | * |
611 | * The function thus returns the percentage of the actual |
612 | * frequency in terms of the maximum frequency of the calling |
613 | * CPU since the last call. A value zero implies an error. |
614 | * |
615 | * For further details, refer to: |
616 | * |
617 | * Intel Corporation: Intel 64 and IA-32 Architectures |
618 | * Software Developer's Manual. Section 13.2, Volume 3A: |
619 | * System Programming Guide, Part 1. July, 2008. |
620 | * |
621 | * Advanced Micro Devices: BIOS and Kernel Developer's |
622 | * Guide (BKDG) for AMD Family 10h Processors. Section |
623 | * 2.4.5, Revision 3.48, April 2010. |
624 | */ |
625 | uint8_t |
626 | acpicpu_md_pstate_hwf(struct cpu_info *ci) |
627 | { |
628 | struct acpicpu_softc *sc; |
629 | uint64_t aperf, mperf; |
630 | uint8_t rv = 0; |
631 | |
632 | sc = acpicpu_sc[ci->ci_acpiid]; |
633 | |
634 | if (__predict_false(sc == NULL)) |
635 | return 0; |
636 | |
637 | if (__predict_false((sc->sc_flags & ACPICPU_FLAG_P_HWF) == 0)) |
638 | return 0; |
639 | |
640 | aperf = sc->sc_pstate_aperf; |
641 | mperf = sc->sc_pstate_mperf; |
642 | |
643 | x86_disable_intr(); |
644 | |
645 | sc->sc_pstate_aperf = rdmsr(MSR_APERF); |
646 | sc->sc_pstate_mperf = rdmsr(MSR_MPERF); |
647 | |
648 | x86_enable_intr(); |
649 | |
650 | aperf = sc->sc_pstate_aperf - aperf; |
651 | mperf = sc->sc_pstate_mperf - mperf; |
652 | |
653 | if (__predict_true(mperf != 0)) |
654 | rv = (aperf * 100) / mperf; |
655 | |
656 | return rv; |
657 | } |
658 | |
659 | static void |
660 | acpicpu_md_pstate_hwf_reset(void *arg1, void *arg2) |
661 | { |
662 | struct cpu_info *ci = curcpu(); |
663 | struct acpicpu_softc *sc; |
664 | |
665 | sc = acpicpu_sc[ci->ci_acpiid]; |
666 | |
667 | if (__predict_false(sc == NULL)) |
668 | return; |
669 | |
670 | x86_disable_intr(); |
671 | |
672 | wrmsr(MSR_APERF, 0); |
673 | wrmsr(MSR_MPERF, 0); |
674 | |
675 | x86_enable_intr(); |
676 | |
677 | sc->sc_pstate_aperf = 0; |
678 | sc->sc_pstate_mperf = 0; |
679 | } |
680 | |
681 | int |
682 | acpicpu_md_pstate_get(struct acpicpu_softc *sc, uint32_t *freq) |
683 | { |
684 | struct acpicpu_pstate *ps = NULL; |
685 | uint64_t val; |
686 | uint32_t i; |
687 | |
688 | if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0) |
689 | return acpicpu_md_pstate_fidvid_get(sc, freq); |
690 | |
691 | /* |
692 | * Pick any P-state for the status address. |
693 | */ |
694 | for (i = 0; i < sc->sc_pstate_count; i++) { |
695 | |
696 | ps = &sc->sc_pstate[i]; |
697 | |
698 | if (__predict_true(ps->ps_freq != 0)) |
699 | break; |
700 | } |
701 | |
702 | if (__predict_false(ps == NULL)) |
703 | return ENODEV; |
704 | |
705 | if (__predict_false(ps->ps_status_addr == 0)) |
706 | return EINVAL; |
707 | |
708 | val = rdmsr(ps->ps_status_addr); |
709 | |
710 | if (__predict_true(ps->ps_status_mask != 0)) |
711 | val = val & ps->ps_status_mask; |
712 | |
713 | /* |
714 | * Search for the value from known P-states. |
715 | */ |
716 | for (i = 0; i < sc->sc_pstate_count; i++) { |
717 | |
718 | ps = &sc->sc_pstate[i]; |
719 | |
720 | if (__predict_false(ps->ps_freq == 0)) |
721 | continue; |
722 | |
723 | if (val == ps->ps_status) { |
724 | *freq = ps->ps_freq; |
725 | return 0; |
726 | } |
727 | } |
728 | |
729 | /* |
730 | * If the value was not found, try APERF/MPERF. |
731 | * The state is P0 if the return value is 100 %. |
732 | */ |
733 | if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) { |
734 | |
735 | KASSERT(sc->sc_pstate_count > 0); |
736 | KASSERT(sc->sc_pstate[0].ps_freq != 0); |
737 | |
738 | if (acpicpu_md_pstate_hwf(sc->sc_ci) == 100) { |
739 | *freq = sc->sc_pstate[0].ps_freq; |
740 | return 0; |
741 | } |
742 | } |
743 | |
744 | return EIO; |
745 | } |
746 | |
747 | int |
748 | acpicpu_md_pstate_set(struct acpicpu_pstate *ps) |
749 | { |
750 | uint64_t val = 0; |
751 | |
752 | if (__predict_false(ps->ps_control_addr == 0)) |
753 | return EINVAL; |
754 | |
755 | if ((ps->ps_flags & ACPICPU_FLAG_P_FIDVID) != 0) |
756 | return acpicpu_md_pstate_fidvid_set(ps); |
757 | |
758 | /* |
759 | * If the mask is set, do a read-modify-write. |
760 | */ |
761 | if (__predict_true(ps->ps_control_mask != 0)) { |
762 | val = rdmsr(ps->ps_control_addr); |
763 | val &= ~ps->ps_control_mask; |
764 | } |
765 | |
766 | val |= ps->ps_control; |
767 | |
768 | wrmsr(ps->ps_control_addr, val); |
769 | DELAY(ps->ps_latency); |
770 | |
771 | return 0; |
772 | } |
773 | |
774 | static int |
775 | acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *sc, uint32_t *freq) |
776 | { |
777 | struct acpicpu_pstate *ps; |
778 | uint32_t fid, i, vid; |
779 | uint32_t cfid, cvid; |
780 | int rv; |
781 | |
782 | /* |
783 | * AMD family 0Fh needs special treatment. |
784 | * While it wants to use ACPI, it does not |
785 | * comply with the ACPI specifications. |
786 | */ |
787 | rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid); |
788 | |
789 | if (rv != 0) |
790 | return rv; |
791 | |
792 | for (i = 0; i < sc->sc_pstate_count; i++) { |
793 | |
794 | ps = &sc->sc_pstate[i]; |
795 | |
796 | if (__predict_false(ps->ps_freq == 0)) |
797 | continue; |
798 | |
799 | fid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_FID); |
800 | vid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_VID); |
801 | |
802 | if (cfid == fid && cvid == vid) { |
803 | *freq = ps->ps_freq; |
804 | return 0; |
805 | } |
806 | } |
807 | |
808 | return EIO; |
809 | } |
810 | |
811 | static int |
812 | acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *ps) |
813 | { |
814 | const uint64_t ctrl = ps->ps_control; |
815 | uint32_t cfid, cvid, fid, i, irt; |
816 | uint32_t pll, vco_cfid, vco_fid; |
817 | uint32_t val, vid, vst; |
818 | int rv; |
819 | |
820 | rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid); |
821 | |
822 | if (rv != 0) |
823 | return rv; |
824 | |
825 | fid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_FID); |
826 | vid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VID); |
827 | irt = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_IRT); |
828 | vst = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VST); |
829 | pll = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_PLL); |
830 | |
831 | vst = vst * 20; |
832 | pll = pll * 1000 / 5; |
833 | irt = 10 * __BIT(irt); |
834 | |
835 | /* |
836 | * Phase 1. |
837 | */ |
838 | while (cvid > vid) { |
839 | |
840 | val = 1 << __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_MVS); |
841 | val = (val > cvid) ? 0 : cvid - val; |
842 | |
843 | acpicpu_md_pstate_fidvid_write(cfid, val, 1, vst); |
844 | rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid); |
845 | |
846 | if (rv != 0) |
847 | return rv; |
848 | } |
849 | |
850 | i = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_RVO); |
851 | |
852 | for (; i > 0 && cvid > 0; --i) { |
853 | |
854 | acpicpu_md_pstate_fidvid_write(cfid, cvid - 1, 1, vst); |
855 | rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid); |
856 | |
857 | if (rv != 0) |
858 | return rv; |
859 | } |
860 | |
861 | /* |
862 | * Phase 2. |
863 | */ |
864 | if (cfid != fid) { |
865 | |
866 | vco_fid = FID_TO_VCO_FID(fid); |
867 | vco_cfid = FID_TO_VCO_FID(cfid); |
868 | |
869 | while (abs(vco_fid - vco_cfid) > 2) { |
870 | |
871 | if (fid <= cfid) |
872 | val = cfid - 2; |
873 | else { |
874 | val = (cfid > 6) ? cfid + 2 : |
875 | FID_TO_VCO_FID(cfid) + 2; |
876 | } |
877 | |
878 | acpicpu_md_pstate_fidvid_write(val, cvid, pll, irt); |
879 | rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL); |
880 | |
881 | if (rv != 0) |
882 | return rv; |
883 | |
884 | vco_cfid = FID_TO_VCO_FID(cfid); |
885 | } |
886 | |
887 | acpicpu_md_pstate_fidvid_write(fid, cvid, pll, irt); |
888 | rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL); |
889 | |
890 | if (rv != 0) |
891 | return rv; |
892 | } |
893 | |
894 | /* |
895 | * Phase 3. |
896 | */ |
897 | if (cvid != vid) { |
898 | |
899 | acpicpu_md_pstate_fidvid_write(cfid, vid, 1, vst); |
900 | rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid); |
901 | |
902 | if (rv != 0) |
903 | return rv; |
904 | } |
905 | |
906 | return 0; |
907 | } |
908 | |
909 | static int |
910 | acpicpu_md_pstate_fidvid_read(uint32_t *cfid, uint32_t *cvid) |
911 | { |
912 | int i = ACPICPU_P_STATE_RETRY * 100; |
913 | uint64_t val; |
914 | |
915 | do { |
916 | val = rdmsr(MSR_0FH_STATUS); |
917 | |
918 | } while (__SHIFTOUT(val, MSR_0FH_STATUS_PENDING) != 0 && --i >= 0); |
919 | |
920 | if (i == 0) |
921 | return EAGAIN; |
922 | |
923 | if (cfid != NULL) |
924 | *cfid = __SHIFTOUT(val, MSR_0FH_STATUS_CFID); |
925 | |
926 | if (cvid != NULL) |
927 | *cvid = __SHIFTOUT(val, MSR_0FH_STATUS_CVID); |
928 | |
929 | return 0; |
930 | } |
931 | |
932 | static void |
933 | acpicpu_md_pstate_fidvid_write(uint32_t fid, |
934 | uint32_t vid, uint32_t cnt, uint32_t tmo) |
935 | { |
936 | uint64_t val = 0; |
937 | |
938 | val |= __SHIFTIN(fid, MSR_0FH_CONTROL_FID); |
939 | val |= __SHIFTIN(vid, MSR_0FH_CONTROL_VID); |
940 | val |= __SHIFTIN(cnt, MSR_0FH_CONTROL_CNT); |
941 | val |= __SHIFTIN(0x1, MSR_0FH_CONTROL_CHG); |
942 | |
943 | wrmsr(MSR_0FH_CONTROL, val); |
944 | DELAY(tmo); |
945 | } |
946 | |
947 | int |
948 | acpicpu_md_tstate_get(struct acpicpu_softc *sc, uint32_t *percent) |
949 | { |
950 | struct acpicpu_tstate *ts; |
951 | uint64_t val; |
952 | uint32_t i; |
953 | |
954 | val = rdmsr(MSR_THERM_CONTROL); |
955 | |
956 | for (i = 0; i < sc->sc_tstate_count; i++) { |
957 | |
958 | ts = &sc->sc_tstate[i]; |
959 | |
960 | if (ts->ts_percent == 0) |
961 | continue; |
962 | |
963 | if (val == ts->ts_status) { |
964 | *percent = ts->ts_percent; |
965 | return 0; |
966 | } |
967 | } |
968 | |
969 | return EIO; |
970 | } |
971 | |
972 | int |
973 | acpicpu_md_tstate_set(struct acpicpu_tstate *ts) |
974 | { |
975 | uint64_t val; |
976 | uint8_t i; |
977 | |
978 | val = ts->ts_control; |
979 | val = val & __BITS(0, 4); |
980 | |
981 | wrmsr(MSR_THERM_CONTROL, val); |
982 | |
983 | if (ts->ts_status == 0) { |
984 | DELAY(ts->ts_latency); |
985 | return 0; |
986 | } |
987 | |
988 | for (i = val = 0; i < ACPICPU_T_STATE_RETRY; i++) { |
989 | |
990 | val = rdmsr(MSR_THERM_CONTROL); |
991 | |
992 | if (val == ts->ts_status) |
993 | return 0; |
994 | |
995 | DELAY(ts->ts_latency); |
996 | } |
997 | |
998 | return EAGAIN; |
999 | } |
1000 | |
1001 | /* |
1002 | * A kludge for backwards compatibility. |
1003 | */ |
1004 | static int |
1005 | acpicpu_md_pstate_sysctl_init(void) |
1006 | { |
1007 | const struct sysctlnode *fnode, *mnode, *rnode; |
1008 | const char *str; |
1009 | int rv; |
1010 | |
1011 | switch (cpu_vendor) { |
1012 | |
1013 | case CPUVENDOR_IDT: |
1014 | case CPUVENDOR_INTEL: |
1015 | str = "est" ; |
1016 | break; |
1017 | |
1018 | case CPUVENDOR_AMD: |
1019 | str = "powernow" ; |
1020 | break; |
1021 | |
1022 | default: |
1023 | return ENODEV; |
1024 | } |
1025 | |
1026 | |
1027 | rv = sysctl_createv(&acpicpu_log, 0, NULL, &rnode, |
1028 | CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep" , NULL, |
1029 | NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL); |
1030 | |
1031 | if (rv != 0) |
1032 | goto fail; |
1033 | |
1034 | rv = sysctl_createv(&acpicpu_log, 0, &rnode, &mnode, |
1035 | 0, CTLTYPE_NODE, str, NULL, |
1036 | NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); |
1037 | |
1038 | if (rv != 0) |
1039 | goto fail; |
1040 | |
1041 | rv = sysctl_createv(&acpicpu_log, 0, &mnode, &fnode, |
1042 | 0, CTLTYPE_NODE, "frequency" , NULL, |
1043 | NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); |
1044 | |
1045 | if (rv != 0) |
1046 | goto fail; |
1047 | |
1048 | rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode, |
1049 | CTLFLAG_READWRITE, CTLTYPE_INT, "target" , NULL, |
1050 | acpicpu_md_pstate_sysctl_set, 0, NULL, 0, CTL_CREATE, CTL_EOL); |
1051 | |
1052 | if (rv != 0) |
1053 | goto fail; |
1054 | |
1055 | rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode, |
1056 | CTLFLAG_READONLY, CTLTYPE_INT, "current" , NULL, |
1057 | acpicpu_md_pstate_sysctl_get, 0, NULL, 0, CTL_CREATE, CTL_EOL); |
1058 | |
1059 | if (rv != 0) |
1060 | goto fail; |
1061 | |
1062 | rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode, |
1063 | CTLFLAG_READONLY, CTLTYPE_STRING, "available" , NULL, |
1064 | acpicpu_md_pstate_sysctl_all, 0, NULL, 0, CTL_CREATE, CTL_EOL); |
1065 | |
1066 | if (rv != 0) |
1067 | goto fail; |
1068 | |
1069 | return 0; |
1070 | |
1071 | fail: |
1072 | if (acpicpu_log != NULL) { |
1073 | sysctl_teardown(&acpicpu_log); |
1074 | acpicpu_log = NULL; |
1075 | } |
1076 | |
1077 | return rv; |
1078 | } |
1079 | |
1080 | static int |
1081 | acpicpu_md_pstate_sysctl_get(SYSCTLFN_ARGS) |
1082 | { |
1083 | struct sysctlnode node; |
1084 | uint32_t freq; |
1085 | int err; |
1086 | |
1087 | freq = cpufreq_get(curcpu()); |
1088 | |
1089 | if (freq == 0) |
1090 | return ENXIO; |
1091 | |
1092 | node = *rnode; |
1093 | node.sysctl_data = &freq; |
1094 | |
1095 | err = sysctl_lookup(SYSCTLFN_CALL(&node)); |
1096 | |
1097 | if (err != 0 || newp == NULL) |
1098 | return err; |
1099 | |
1100 | return 0; |
1101 | } |
1102 | |
1103 | static int |
1104 | acpicpu_md_pstate_sysctl_set(SYSCTLFN_ARGS) |
1105 | { |
1106 | struct sysctlnode node; |
1107 | uint32_t freq; |
1108 | int err; |
1109 | |
1110 | freq = cpufreq_get(curcpu()); |
1111 | |
1112 | if (freq == 0) |
1113 | return ENXIO; |
1114 | |
1115 | node = *rnode; |
1116 | node.sysctl_data = &freq; |
1117 | |
1118 | err = sysctl_lookup(SYSCTLFN_CALL(&node)); |
1119 | |
1120 | if (err != 0 || newp == NULL) |
1121 | return err; |
1122 | |
1123 | cpufreq_set_all(freq); |
1124 | |
1125 | return 0; |
1126 | } |
1127 | |
1128 | static int |
1129 | acpicpu_md_pstate_sysctl_all(SYSCTLFN_ARGS) |
1130 | { |
1131 | struct cpu_info *ci = curcpu(); |
1132 | struct acpicpu_softc *sc; |
1133 | struct sysctlnode node; |
1134 | char buf[1024]; |
1135 | size_t len; |
1136 | uint32_t i; |
1137 | int err; |
1138 | |
1139 | sc = acpicpu_sc[ci->ci_acpiid]; |
1140 | |
1141 | if (sc == NULL) |
1142 | return ENXIO; |
1143 | |
1144 | (void)memset(&buf, 0, sizeof(buf)); |
1145 | |
1146 | mutex_enter(&sc->sc_mtx); |
1147 | |
1148 | for (len = 0, i = sc->sc_pstate_max; i < sc->sc_pstate_count; i++) { |
1149 | |
1150 | if (sc->sc_pstate[i].ps_freq == 0) |
1151 | continue; |
1152 | |
1153 | if (len >= sizeof(buf)) |
1154 | break; |
1155 | len += snprintf(buf + len, sizeof(buf) - len, "%u%s" , |
1156 | sc->sc_pstate[i].ps_freq, |
1157 | i < (sc->sc_pstate_count - 1) ? " " : "" ); |
1158 | } |
1159 | |
1160 | mutex_exit(&sc->sc_mtx); |
1161 | |
1162 | node = *rnode; |
1163 | node.sysctl_data = buf; |
1164 | |
1165 | err = sysctl_lookup(SYSCTLFN_CALL(&node)); |
1166 | |
1167 | if (err != 0 || newp == NULL) |
1168 | return err; |
1169 | |
1170 | return 0; |
1171 | } |
1172 | |
1173 | |