Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : #include <linux/kernel.h>
3 : #include <linux/pgtable.h>
4 :
5 : #include <linux/string.h>
6 : #include <linux/bitops.h>
7 : #include <linux/smp.h>
8 : #include <linux/sched.h>
9 : #include <linux/sched/clock.h>
10 : #include <linux/thread_info.h>
11 : #include <linux/init.h>
12 : #include <linux/uaccess.h>
13 :
14 : #include <asm/cpufeature.h>
15 : #include <asm/msr.h>
16 : #include <asm/bugs.h>
17 : #include <asm/cpu.h>
18 : #include <asm/intel-family.h>
19 : #include <asm/microcode_intel.h>
20 : #include <asm/hwcap2.h>
21 : #include <asm/elf.h>
22 : #include <asm/cpu_device_id.h>
23 : #include <asm/cmdline.h>
24 : #include <asm/traps.h>
25 : #include <asm/resctrl.h>
26 : #include <asm/numa.h>
27 : #include <asm/thermal.h>
28 :
29 : #ifdef CONFIG_X86_64
30 : #include <linux/topology.h>
31 : #endif
32 :
33 : #include "cpu.h"
34 :
35 : #ifdef CONFIG_X86_LOCAL_APIC
36 : #include <asm/mpspec.h>
37 : #include <asm/apic.h>
38 : #endif
39 :
40 : enum split_lock_detect_state {
41 : sld_off = 0,
42 : sld_warn,
43 : sld_fatal,
44 : };
45 :
46 : /*
47 : * Default to sld_off because most systems do not support split lock detection
48 : * split_lock_setup() will switch this to sld_warn on systems that support
49 : * split lock detect, unless there is a command line override.
50 : */
51 : static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
52 : static u64 msr_test_ctrl_cache __ro_after_init;
53 :
54 : /*
55 : * With a name like MSR_TEST_CTL it should go without saying, but don't touch
56 : * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it
57 : * on CPUs that do not support SLD can cause fireworks, even when writing '0'.
58 : */
59 : static bool cpu_model_supports_sld __ro_after_init;
60 :
61 : /*
62 : * Processors which have self-snooping capability can handle conflicting
63 : * memory type across CPUs by snooping its own cache. However, there exists
64 : * CPU models in which having conflicting memory types still leads to
65 : * unpredictable behavior, machine check errors, or hangs. Clear this
66 : * feature to prevent its use on machines with known erratas.
67 : */
68 5 : static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c)
69 : {
70 5 : switch (c->x86_model) {
71 0 : case INTEL_FAM6_CORE_YONAH:
72 : case INTEL_FAM6_CORE2_MEROM:
73 : case INTEL_FAM6_CORE2_MEROM_L:
74 : case INTEL_FAM6_CORE2_PENRYN:
75 : case INTEL_FAM6_CORE2_DUNNINGTON:
76 : case INTEL_FAM6_NEHALEM:
77 : case INTEL_FAM6_NEHALEM_G:
78 : case INTEL_FAM6_NEHALEM_EP:
79 : case INTEL_FAM6_NEHALEM_EX:
80 : case INTEL_FAM6_WESTMERE:
81 : case INTEL_FAM6_WESTMERE_EP:
82 : case INTEL_FAM6_SANDYBRIDGE:
83 0 : setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP);
84 : }
85 5 : }
86 :
87 : static bool ring3mwait_disabled __read_mostly;
88 :
89 0 : static int __init ring3mwait_disable(char *__unused)
90 : {
91 0 : ring3mwait_disabled = true;
92 0 : return 0;
93 : }
94 : __setup("ring3mwait=disable", ring3mwait_disable);
95 :
96 4 : static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
97 : {
98 : /*
99 : * Ring 3 MONITOR/MWAIT feature cannot be detected without
100 : * cpu model and family comparison.
101 : */
102 4 : if (c->x86 != 6)
103 : return;
104 4 : switch (c->x86_model) {
105 : case INTEL_FAM6_XEON_PHI_KNL:
106 : case INTEL_FAM6_XEON_PHI_KNM:
107 0 : break;
108 : default:
109 : return;
110 : }
111 :
112 0 : if (ring3mwait_disabled)
113 : return;
114 :
115 0 : set_cpu_cap(c, X86_FEATURE_RING3MWAIT);
116 0 : this_cpu_or(msr_misc_features_shadow,
117 : 1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT);
118 :
119 0 : if (c == &boot_cpu_data)
120 0 : ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
121 : }
122 :
123 : /*
124 : * Early microcode releases for the Spectre v2 mitigation were broken.
125 : * Information taken from;
126 : * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf
127 : * - https://kb.vmware.com/s/article/52345
128 : * - Microcode revisions observed in the wild
129 : * - Release note from 20180108 microcode release
130 : */
131 : struct sku_microcode {
132 : u8 model;
133 : u8 stepping;
134 : u32 microcode;
135 : };
136 : static const struct sku_microcode spectre_bad_microcodes[] = {
137 : { INTEL_FAM6_KABYLAKE, 0x0B, 0x80 },
138 : { INTEL_FAM6_KABYLAKE, 0x0A, 0x80 },
139 : { INTEL_FAM6_KABYLAKE, 0x09, 0x80 },
140 : { INTEL_FAM6_KABYLAKE_L, 0x0A, 0x80 },
141 : { INTEL_FAM6_KABYLAKE_L, 0x09, 0x80 },
142 : { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
143 : { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
144 : { INTEL_FAM6_BROADWELL, 0x04, 0x28 },
145 : { INTEL_FAM6_BROADWELL_G, 0x01, 0x1b },
146 : { INTEL_FAM6_BROADWELL_D, 0x02, 0x14 },
147 : { INTEL_FAM6_BROADWELL_D, 0x03, 0x07000011 },
148 : { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 },
149 : { INTEL_FAM6_HASWELL_L, 0x01, 0x21 },
150 : { INTEL_FAM6_HASWELL_G, 0x01, 0x18 },
151 : { INTEL_FAM6_HASWELL, 0x03, 0x23 },
152 : { INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
153 : { INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
154 : { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
155 : /* Observed in the wild */
156 : { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b },
157 : { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 },
158 : };
159 :
160 5 : static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
161 : {
162 5 : int i;
163 :
164 : /*
165 : * We know that the hypervisor lie to us on the microcode version so
166 : * we may as well hope that it is running the correct version.
167 : */
168 5 : if (cpu_has(c, X86_FEATURE_HYPERVISOR))
169 : return false;
170 :
171 0 : if (c->x86 != 6)
172 : return false;
173 :
174 0 : for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
175 0 : if (c->x86_model == spectre_bad_microcodes[i].model &&
176 0 : c->x86_stepping == spectre_bad_microcodes[i].stepping)
177 0 : return (c->microcode <= spectre_bad_microcodes[i].microcode);
178 : }
179 : return false;
180 : }
181 :
182 5 : static void early_init_intel(struct cpuinfo_x86 *c)
183 : {
184 5 : u64 misc_enable;
185 :
186 : /* Unmask CPUID levels if masked: */
187 5 : if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
188 5 : if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
189 : MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) {
190 0 : c->cpuid_level = cpuid_eax(0);
191 0 : get_cpu_cap(c);
192 : }
193 : }
194 :
195 5 : if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
196 5 : (c->x86 == 0x6 && c->x86_model >= 0x0e))
197 5 : set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
198 :
199 5 : if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
200 5 : c->microcode = intel_get_microcode_revision();
201 :
202 : /* Now if any of them are set, check the blacklist and clear the lot */
203 5 : if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
204 0 : cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
205 0 : cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
206 5 : cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
207 0 : pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
208 0 : setup_clear_cpu_cap(X86_FEATURE_IBRS);
209 0 : setup_clear_cpu_cap(X86_FEATURE_IBPB);
210 0 : setup_clear_cpu_cap(X86_FEATURE_STIBP);
211 0 : setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
212 0 : setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL);
213 0 : setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
214 0 : setup_clear_cpu_cap(X86_FEATURE_SSBD);
215 0 : setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD);
216 : }
217 :
218 : /*
219 : * Atom erratum AAE44/AAF40/AAG38/AAH41:
220 : *
221 : * A race condition between speculative fetches and invalidating
222 : * a large page. This is worked around in microcode, but we
223 : * need the microcode to have already been loaded... so if it is
224 : * not, recommend a BIOS update and disable large pages.
225 : */
226 5 : if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 &&
227 0 : c->microcode < 0x20e) {
228 0 : pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
229 0 : clear_cpu_cap(c, X86_FEATURE_PSE);
230 : }
231 :
232 : #ifdef CONFIG_X86_64
233 5 : set_cpu_cap(c, X86_FEATURE_SYSENTER32);
234 : #else
235 : /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
236 : if (c->x86 == 15 && c->x86_cache_alignment == 64)
237 : c->x86_cache_alignment = 128;
238 : #endif
239 :
240 : /* CPUID workaround for 0F33/0F34 CPU */
241 5 : if (c->x86 == 0xF && c->x86_model == 0x3
242 0 : && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4))
243 0 : c->x86_phys_bits = 36;
244 :
245 : /*
246 : * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
247 : * with P/T states and does not stop in deep C-states.
248 : *
249 : * It is also reliable across cores and sockets. (but not across
250 : * cabinets - we turn it off in that case explicitly.)
251 : */
252 5 : if (c->x86_power & (1 << 8)) {
253 0 : set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
254 0 : set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
255 : }
256 :
257 : /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
258 5 : if (c->x86 == 6) {
259 5 : switch (c->x86_model) {
260 0 : case INTEL_FAM6_ATOM_SALTWELL_MID:
261 : case INTEL_FAM6_ATOM_SALTWELL_TABLET:
262 : case INTEL_FAM6_ATOM_SILVERMONT_MID:
263 : case INTEL_FAM6_ATOM_AIRMONT_NP:
264 0 : set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3);
265 0 : break;
266 : default:
267 : break;
268 : }
269 0 : }
270 :
271 : /*
272 : * There is a known erratum on Pentium III and Core Solo
273 : * and Core Duo CPUs.
274 : * " Page with PAT set to WC while associated MTRR is UC
275 : * may consolidate to UC "
276 : * Because of this erratum, it is better to stick with
277 : * setting WC in MTRR rather than using PAT on these CPUs.
278 : *
279 : * Enable PAT WC only on P4, Core 2 or later CPUs.
280 : */
281 5 : if (c->x86 == 6 && c->x86_model < 15)
282 0 : clear_cpu_cap(c, X86_FEATURE_PAT);
283 :
284 : /*
285 : * If fast string is not enabled in IA32_MISC_ENABLE for any reason,
286 : * clear the fast string and enhanced fast string CPU capabilities.
287 : */
288 5 : if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
289 5 : rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
290 5 : if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
291 0 : pr_info("Disabled fast string operations\n");
292 0 : setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
293 0 : setup_clear_cpu_cap(X86_FEATURE_ERMS);
294 : }
295 : }
296 :
297 : /*
298 : * Intel Quark Core DevMan_001.pdf section 6.4.11
299 : * "The operating system also is required to invalidate (i.e., flush)
300 : * the TLB when any changes are made to any of the page table entries.
301 : * The operating system must reload CR3 to cause the TLB to be flushed"
302 : *
303 : * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h
304 : * should be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
305 : * to be modified.
306 : */
307 5 : if (c->x86 == 5 && c->x86_model == 9) {
308 0 : pr_info("Disabling PGE capability bit\n");
309 0 : setup_clear_cpu_cap(X86_FEATURE_PGE);
310 : }
311 :
312 5 : if (c->cpuid_level >= 0x00000001) {
313 5 : u32 eax, ebx, ecx, edx;
314 :
315 5 : cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
316 : /*
317 : * If HTT (EDX[28]) is set EBX[16:23] contain the number of
318 : * apicids which are reserved per package. Store the resulting
319 : * shift value for the package management code.
320 : */
321 5 : if (edx & (1U << 28))
322 0 : c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
323 : }
324 :
325 5 : check_memory_type_self_snoop_errata(c);
326 :
327 : /*
328 : * Get the number of SMT siblings early from the extended topology
329 : * leaf, if available. Otherwise try the legacy SMT detection.
330 : */
331 5 : if (detect_extended_topology_early(c) < 0)
332 0 : detect_ht_early(c);
333 5 : }
334 :
335 1 : static void bsp_init_intel(struct cpuinfo_x86 *c)
336 : {
337 1 : resctrl_cpu_detect(c);
338 1 : }
339 :
340 : #ifdef CONFIG_X86_32
341 : /*
342 : * Early probe support logic for ppro memory erratum #50
343 : *
344 : * This is called before we do cpu ident work
345 : */
346 :
347 : int ppro_with_ram_bug(void)
348 : {
349 : /* Uses data from early_cpu_detect now */
350 : if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
351 : boot_cpu_data.x86 == 6 &&
352 : boot_cpu_data.x86_model == 1 &&
353 : boot_cpu_data.x86_stepping < 8) {
354 : pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
355 : return 1;
356 : }
357 : return 0;
358 : }
359 :
360 : static void intel_smp_check(struct cpuinfo_x86 *c)
361 : {
362 : /* calling is from identify_secondary_cpu() ? */
363 : if (!c->cpu_index)
364 : return;
365 :
366 : /*
367 : * Mask B, Pentium, but not Pentium MMX
368 : */
369 : if (c->x86 == 5 &&
370 : c->x86_stepping >= 1 && c->x86_stepping <= 4 &&
371 : c->x86_model <= 3) {
372 : /*
373 : * Remember we have B step Pentia with bugs
374 : */
375 : WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
376 : "with B stepping processors.\n");
377 : }
378 : }
379 :
380 : static int forcepae;
381 : static int __init forcepae_setup(char *__unused)
382 : {
383 : forcepae = 1;
384 : return 1;
385 : }
386 : __setup("forcepae", forcepae_setup);
387 :
388 : static void intel_workarounds(struct cpuinfo_x86 *c)
389 : {
390 : #ifdef CONFIG_X86_F00F_BUG
391 : /*
392 : * All models of Pentium and Pentium with MMX technology CPUs
393 : * have the F0 0F bug, which lets nonprivileged users lock up the
394 : * system. Announce that the fault handler will be checking for it.
395 : * The Quark is also family 5, but does not have the same bug.
396 : */
397 : clear_cpu_bug(c, X86_BUG_F00F);
398 : if (c->x86 == 5 && c->x86_model < 9) {
399 : static int f00f_workaround_enabled;
400 :
401 : set_cpu_bug(c, X86_BUG_F00F);
402 : if (!f00f_workaround_enabled) {
403 : pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n");
404 : f00f_workaround_enabled = 1;
405 : }
406 : }
407 : #endif
408 :
409 : /*
410 : * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
411 : * model 3 mask 3
412 : */
413 : if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633)
414 : clear_cpu_cap(c, X86_FEATURE_SEP);
415 :
416 : /*
417 : * PAE CPUID issue: many Pentium M report no PAE but may have a
418 : * functionally usable PAE implementation.
419 : * Forcefully enable PAE if kernel parameter "forcepae" is present.
420 : */
421 : if (forcepae) {
422 : pr_warn("PAE forced!\n");
423 : set_cpu_cap(c, X86_FEATURE_PAE);
424 : add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
425 : }
426 :
427 : /*
428 : * P4 Xeon erratum 037 workaround.
429 : * Hardware prefetcher may cause stale data to be loaded into the cache.
430 : */
431 : if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) {
432 : if (msr_set_bit(MSR_IA32_MISC_ENABLE,
433 : MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) {
434 : pr_info("CPU: C0 stepping P4 Xeon detected.\n");
435 : pr_info("CPU: Disabling hardware prefetching (Erratum 037)\n");
436 : }
437 : }
438 :
439 : /*
440 : * See if we have a good local APIC by checking for buggy Pentia,
441 : * i.e. all B steppings and the C2 stepping of P54C when using their
442 : * integrated APIC (see 11AP erratum in "Pentium Processor
443 : * Specification Update").
444 : */
445 : if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
446 : (c->x86_stepping < 0x6 || c->x86_stepping == 0xb))
447 : set_cpu_bug(c, X86_BUG_11AP);
448 :
449 :
450 : #ifdef CONFIG_X86_INTEL_USERCOPY
451 : /*
452 : * Set up the preferred alignment for movsl bulk memory moves
453 : */
454 : switch (c->x86) {
455 : case 4: /* 486: untested */
456 : break;
457 : case 5: /* Old Pentia: untested */
458 : break;
459 : case 6: /* PII/PIII only like movsl with 8-byte alignment */
460 : movsl_mask.mask = 7;
461 : break;
462 : case 15: /* P4 is OK down to 8-byte alignment */
463 : movsl_mask.mask = 7;
464 : break;
465 : }
466 : #endif
467 :
468 : intel_smp_check(c);
469 : }
470 : #else
471 4 : static void intel_workarounds(struct cpuinfo_x86 *c)
472 : {
473 4 : }
474 : #endif
475 :
476 4 : static void srat_detect_node(struct cpuinfo_x86 *c)
477 : {
478 : #ifdef CONFIG_NUMA
479 4 : unsigned node;
480 4 : int cpu = smp_processor_id();
481 :
482 : /* Don't do the funky fallback heuristics the AMD version employs
483 : for now. */
484 4 : node = numa_cpu_node(cpu);
485 4 : if (node == NUMA_NO_NODE || !node_online(node)) {
486 : /* reuse the value from init_cpu_to_node() */
487 4 : node = cpu_to_node(cpu);
488 : }
489 4 : numa_set_node(cpu, node);
490 : #endif
491 4 : }
492 :
493 : #define MSR_IA32_TME_ACTIVATE 0x982
494 :
495 : /* Helpers to access TME_ACTIVATE MSR */
496 : #define TME_ACTIVATE_LOCKED(x) (x & 0x1)
497 : #define TME_ACTIVATE_ENABLED(x) (x & 0x2)
498 :
499 : #define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
500 : #define TME_ACTIVATE_POLICY_AES_XTS_128 0
501 :
502 : #define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
503 :
504 : #define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
505 : #define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
506 :
507 : /* Values for mktme_status (SW only construct) */
508 : #define MKTME_ENABLED 0
509 : #define MKTME_DISABLED 1
510 : #define MKTME_UNINITIALIZED 2
511 : static int mktme_status = MKTME_UNINITIALIZED;
512 :
513 0 : static void detect_tme(struct cpuinfo_x86 *c)
514 : {
515 0 : u64 tme_activate, tme_policy, tme_crypto_algs;
516 0 : int keyid_bits = 0, nr_keyids = 0;
517 0 : static u64 tme_activate_cpu0 = 0;
518 :
519 0 : rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
520 :
521 0 : if (mktme_status != MKTME_UNINITIALIZED) {
522 0 : if (tme_activate != tme_activate_cpu0) {
523 : /* Broken BIOS? */
524 0 : pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
525 0 : pr_err_once("x86/tme: MKTME is not usable\n");
526 0 : mktme_status = MKTME_DISABLED;
527 :
528 : /* Proceed. We may need to exclude bits from x86_phys_bits. */
529 : }
530 : } else {
531 0 : tme_activate_cpu0 = tme_activate;
532 : }
533 :
534 0 : if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
535 0 : pr_info_once("x86/tme: not enabled by BIOS\n");
536 0 : mktme_status = MKTME_DISABLED;
537 0 : return;
538 : }
539 :
540 0 : if (mktme_status != MKTME_UNINITIALIZED)
541 0 : goto detect_keyid_bits;
542 :
543 0 : pr_info("x86/tme: enabled by BIOS\n");
544 :
545 0 : tme_policy = TME_ACTIVATE_POLICY(tme_activate);
546 0 : if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
547 0 : pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
548 :
549 0 : tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
550 0 : if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
551 0 : pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
552 : tme_crypto_algs);
553 0 : mktme_status = MKTME_DISABLED;
554 : }
555 0 : detect_keyid_bits:
556 0 : keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
557 0 : nr_keyids = (1UL << keyid_bits) - 1;
558 0 : if (nr_keyids) {
559 0 : pr_info_once("x86/mktme: enabled by BIOS\n");
560 0 : pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
561 : } else {
562 0 : pr_info_once("x86/mktme: disabled by BIOS\n");
563 : }
564 :
565 0 : if (mktme_status == MKTME_UNINITIALIZED) {
566 : /* MKTME is usable */
567 0 : mktme_status = MKTME_ENABLED;
568 : }
569 :
570 : /*
571 : * KeyID bits effectively lower the number of physical address
572 : * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
573 : */
574 0 : c->x86_phys_bits -= keyid_bits;
575 : }
576 :
577 4 : static void init_cpuid_fault(struct cpuinfo_x86 *c)
578 : {
579 4 : u64 msr;
580 :
581 4 : if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) {
582 4 : if (msr & MSR_PLATFORM_INFO_CPUID_FAULT)
583 4 : set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
584 : }
585 4 : }
586 :
587 4 : static void init_intel_misc_features(struct cpuinfo_x86 *c)
588 : {
589 4 : u64 msr;
590 :
591 4 : if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr))
592 4 : return;
593 :
594 : /* Clear all MISC features */
595 4 : this_cpu_write(msr_misc_features_shadow, 0);
596 :
597 : /* Check features and update capabilities and shadow control bits */
598 4 : init_cpuid_fault(c);
599 4 : probe_xeon_phi_r3mwait(c);
600 :
601 4 : msr = this_cpu_read(msr_misc_features_shadow);
602 4 : wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
603 : }
604 :
605 : static void split_lock_init(void);
606 :
607 4 : static void init_intel(struct cpuinfo_x86 *c)
608 : {
609 4 : early_init_intel(c);
610 :
611 4 : intel_workarounds(c);
612 :
613 : /*
614 : * Detect the extended topology information if available. This
615 : * will reinitialise the initial_apicid which will be used
616 : * in init_intel_cacheinfo()
617 : */
618 4 : detect_extended_topology(c);
619 :
620 4 : if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
621 : /*
622 : * let's use the legacy cpuid vector 0x1 and 0x4 for topology
623 : * detection.
624 : */
625 0 : detect_num_cpu_cores(c);
626 : #ifdef CONFIG_X86_32
627 : detect_ht(c);
628 : #endif
629 : }
630 :
631 4 : init_intel_cacheinfo(c);
632 :
633 4 : if (c->cpuid_level > 9) {
634 4 : unsigned eax = cpuid_eax(10);
635 : /* Check for version and the number of counters */
636 4 : if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
637 4 : set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
638 : }
639 :
640 4 : if (cpu_has(c, X86_FEATURE_XMM2))
641 4 : set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
642 :
643 4 : if (boot_cpu_has(X86_FEATURE_DS)) {
644 0 : unsigned int l1, l2;
645 :
646 0 : rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
647 0 : if (!(l1 & (1<<11)))
648 0 : set_cpu_cap(c, X86_FEATURE_BTS);
649 0 : if (!(l1 & (1<<12)))
650 0 : set_cpu_cap(c, X86_FEATURE_PEBS);
651 : }
652 :
653 8 : if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) &&
654 4 : (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
655 0 : set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
656 :
657 8 : if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_MWAIT) &&
658 0 : ((c->x86_model == INTEL_FAM6_ATOM_GOLDMONT)))
659 0 : set_cpu_bug(c, X86_BUG_MONITOR);
660 :
661 : #ifdef CONFIG_X86_64
662 4 : if (c->x86 == 15)
663 0 : c->x86_cache_alignment = c->x86_clflush_size * 2;
664 4 : if (c->x86 == 6)
665 4 : set_cpu_cap(c, X86_FEATURE_REP_GOOD);
666 : #else
667 : /*
668 : * Names for the Pentium II/Celeron processors
669 : * detectable only by also checking the cache size.
670 : * Dixon is NOT a Celeron.
671 : */
672 : if (c->x86 == 6) {
673 : unsigned int l2 = c->x86_cache_size;
674 : char *p = NULL;
675 :
676 : switch (c->x86_model) {
677 : case 5:
678 : if (l2 == 0)
679 : p = "Celeron (Covington)";
680 : else if (l2 == 256)
681 : p = "Mobile Pentium II (Dixon)";
682 : break;
683 :
684 : case 6:
685 : if (l2 == 128)
686 : p = "Celeron (Mendocino)";
687 : else if (c->x86_stepping == 0 || c->x86_stepping == 5)
688 : p = "Celeron-A";
689 : break;
690 :
691 : case 8:
692 : if (l2 == 128)
693 : p = "Celeron (Coppermine)";
694 : break;
695 : }
696 :
697 : if (p)
698 : strcpy(c->x86_model_id, p);
699 : }
700 :
701 : if (c->x86 == 15)
702 : set_cpu_cap(c, X86_FEATURE_P4);
703 : if (c->x86 == 6)
704 : set_cpu_cap(c, X86_FEATURE_P3);
705 : #endif
706 :
707 : /* Work around errata */
708 4 : srat_detect_node(c);
709 :
710 4 : init_ia32_feat_ctl(c);
711 :
712 4 : if (cpu_has(c, X86_FEATURE_TME))
713 0 : detect_tme(c);
714 :
715 4 : init_intel_misc_features(c);
716 :
717 4 : if (tsx_ctrl_state == TSX_CTRL_ENABLE)
718 0 : tsx_enable();
719 4 : if (tsx_ctrl_state == TSX_CTRL_DISABLE)
720 0 : tsx_disable();
721 :
722 4 : split_lock_init();
723 :
724 4 : intel_init_thermal(c);
725 4 : }
726 :
727 : #ifdef CONFIG_X86_32
728 : static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
729 : {
730 : /*
731 : * Intel PIII Tualatin. This comes in two flavours.
732 : * One has 256kb of cache, the other 512. We have no way
733 : * to determine which, so we use a boottime override
734 : * for the 512kb model, and assume 256 otherwise.
735 : */
736 : if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0))
737 : size = 256;
738 :
739 : /*
740 : * Intel Quark SoC X1000 contains a 4-way set associative
741 : * 16K cache with a 16 byte cache line and 256 lines per tag
742 : */
743 : if ((c->x86 == 5) && (c->x86_model == 9))
744 : size = 16;
745 : return size;
746 : }
747 : #endif
748 :
749 : #define TLB_INST_4K 0x01
750 : #define TLB_INST_4M 0x02
751 : #define TLB_INST_2M_4M 0x03
752 :
753 : #define TLB_INST_ALL 0x05
754 : #define TLB_INST_1G 0x06
755 :
756 : #define TLB_DATA_4K 0x11
757 : #define TLB_DATA_4M 0x12
758 : #define TLB_DATA_2M_4M 0x13
759 : #define TLB_DATA_4K_4M 0x14
760 :
761 : #define TLB_DATA_1G 0x16
762 :
763 : #define TLB_DATA0_4K 0x21
764 : #define TLB_DATA0_4M 0x22
765 : #define TLB_DATA0_2M_4M 0x23
766 :
767 : #define STLB_4K 0x41
768 : #define STLB_4K_2M 0x42
769 :
770 : static const struct _tlb_table intel_tlb_table[] = {
771 : { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" },
772 : { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" },
773 : { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" },
774 : { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" },
775 : { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" },
776 : { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" },
777 : { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages" },
778 : { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
779 : { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
780 : { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
781 : { 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" },
782 : { 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" },
783 : { 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" },
784 : { 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" },
785 : { 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" },
786 : { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" },
787 : { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" },
788 : { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" },
789 : { 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" },
790 : { 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" },
791 : { 0x6b, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 8-way associative" },
792 : { 0x6c, TLB_DATA_2M_4M, 128, " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" },
793 : { 0x6d, TLB_DATA_1G, 16, " TLB_DATA 1 GByte pages, fully associative" },
794 : { 0x76, TLB_INST_2M_4M, 8, " TLB_INST 2-MByte or 4-MByte pages, fully associative" },
795 : { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" },
796 : { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" },
797 : { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" },
798 : { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" },
799 : { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" },
800 : { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set associative" },
801 : { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set associative" },
802 : { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" },
803 : { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" },
804 : { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" },
805 : { 0xc2, TLB_DATA_2M_4M, 16, " TLB_DATA 2 MByte/4MByte pages, 4-way associative" },
806 : { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" },
807 : { 0x00, 0, 0 }
808 : };
809 :
810 15 : static void intel_tlb_lookup(const unsigned char desc)
811 : {
812 15 : unsigned char k;
813 15 : if (desc == 0)
814 : return;
815 :
816 : /* look up this descriptor in the table */
817 148 : for (k = 0; intel_tlb_table[k].descriptor != desc &&
818 144 : intel_tlb_table[k].descriptor != 0; k++)
819 144 : ;
820 :
821 4 : if (intel_tlb_table[k].tlb_type == 0)
822 : return;
823 :
824 0 : switch (intel_tlb_table[k].tlb_type) {
825 0 : case STLB_4K:
826 0 : if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
827 0 : tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
828 0 : if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
829 0 : tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
830 : break;
831 0 : case STLB_4K_2M:
832 0 : if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
833 0 : tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
834 0 : if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
835 0 : tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
836 0 : if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
837 0 : tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
838 0 : if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries)
839 0 : tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries;
840 0 : if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
841 0 : tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
842 0 : if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
843 0 : tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
844 : break;
845 0 : case TLB_INST_ALL:
846 0 : if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
847 0 : tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
848 0 : if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
849 0 : tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
850 0 : if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
851 0 : tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
852 : break;
853 0 : case TLB_INST_4K:
854 0 : if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
855 0 : tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
856 : break;
857 0 : case TLB_INST_4M:
858 0 : if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
859 0 : tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
860 : break;
861 0 : case TLB_INST_2M_4M:
862 0 : if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
863 0 : tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
864 0 : if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
865 0 : tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
866 : break;
867 0 : case TLB_DATA_4K:
868 : case TLB_DATA0_4K:
869 0 : if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
870 0 : tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
871 : break;
872 0 : case TLB_DATA_4M:
873 : case TLB_DATA0_4M:
874 0 : if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
875 0 : tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
876 : break;
877 0 : case TLB_DATA_2M_4M:
878 : case TLB_DATA0_2M_4M:
879 0 : if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries)
880 0 : tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries;
881 0 : if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
882 0 : tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
883 : break;
884 0 : case TLB_DATA_4K_4M:
885 0 : if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
886 0 : tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
887 0 : if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
888 0 : tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
889 : break;
890 0 : case TLB_DATA_1G:
891 0 : if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries)
892 0 : tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries;
893 : break;
894 : }
895 15 : }
896 :
897 1 : static void intel_detect_tlb(struct cpuinfo_x86 *c)
898 : {
899 1 : int i, j, n;
900 1 : unsigned int regs[4];
901 1 : unsigned char *desc = (unsigned char *)regs;
902 :
903 1 : if (c->cpuid_level < 2)
904 0 : return;
905 :
906 : /* Number of times to iterate */
907 1 : n = cpuid_eax(2) & 0xFF;
908 :
909 2 : for (i = 0 ; i < n ; i++) {
910 1 : cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]);
911 :
912 : /* If bit 31 is set, this is an unknown format */
913 5 : for (j = 0 ; j < 3 ; j++)
914 3 : if (regs[j] & (1 << 31))
915 0 : regs[j] = 0;
916 :
917 : /* Byte 0 is level count, not a descriptor */
918 16 : for (j = 1 ; j < 16 ; j++)
919 15 : intel_tlb_lookup(desc[j]);
920 : }
921 : }
922 :
923 : static const struct cpu_dev intel_cpu_dev = {
924 : .c_vendor = "Intel",
925 : .c_ident = { "GenuineIntel" },
926 : #ifdef CONFIG_X86_32
927 : .legacy_models = {
928 : { .family = 4, .model_names =
929 : {
930 : [0] = "486 DX-25/33",
931 : [1] = "486 DX-50",
932 : [2] = "486 SX",
933 : [3] = "486 DX/2",
934 : [4] = "486 SL",
935 : [5] = "486 SX/2",
936 : [7] = "486 DX/2-WB",
937 : [8] = "486 DX/4",
938 : [9] = "486 DX/4-WB"
939 : }
940 : },
941 : { .family = 5, .model_names =
942 : {
943 : [0] = "Pentium 60/66 A-step",
944 : [1] = "Pentium 60/66",
945 : [2] = "Pentium 75 - 200",
946 : [3] = "OverDrive PODP5V83",
947 : [4] = "Pentium MMX",
948 : [7] = "Mobile Pentium 75 - 200",
949 : [8] = "Mobile Pentium MMX",
950 : [9] = "Quark SoC X1000",
951 : }
952 : },
953 : { .family = 6, .model_names =
954 : {
955 : [0] = "Pentium Pro A-step",
956 : [1] = "Pentium Pro",
957 : [3] = "Pentium II (Klamath)",
958 : [4] = "Pentium II (Deschutes)",
959 : [5] = "Pentium II (Deschutes)",
960 : [6] = "Mobile Pentium II",
961 : [7] = "Pentium III (Katmai)",
962 : [8] = "Pentium III (Coppermine)",
963 : [10] = "Pentium III (Cascades)",
964 : [11] = "Pentium III (Tualatin)",
965 : }
966 : },
967 : { .family = 15, .model_names =
968 : {
969 : [0] = "Pentium 4 (Unknown)",
970 : [1] = "Pentium 4 (Willamette)",
971 : [2] = "Pentium 4 (Northwood)",
972 : [4] = "Pentium 4 (Foster)",
973 : [5] = "Pentium 4 (Foster)",
974 : }
975 : },
976 : },
977 : .legacy_cache_size = intel_size_cache,
978 : #endif
979 : .c_detect_tlb = intel_detect_tlb,
980 : .c_early_init = early_init_intel,
981 : .c_bsp_init = bsp_init_intel,
982 : .c_init = init_intel,
983 : .c_x86_vendor = X86_VENDOR_INTEL,
984 : };
985 :
986 : cpu_dev_register(intel_cpu_dev);
987 :
988 : #undef pr_fmt
989 : #define pr_fmt(fmt) "x86/split lock detection: " fmt
990 :
991 : static const struct {
992 : const char *option;
993 : enum split_lock_detect_state state;
994 : } sld_options[] __initconst = {
995 : { "off", sld_off },
996 : { "warn", sld_warn },
997 : { "fatal", sld_fatal },
998 : };
999 :
1000 0 : static inline bool match_option(const char *arg, int arglen, const char *opt)
1001 : {
1002 0 : int len = strlen(opt);
1003 :
1004 0 : return len == arglen && !strncmp(arg, opt, len);
1005 : }
1006 :
1007 0 : static bool split_lock_verify_msr(bool on)
1008 : {
1009 0 : u64 ctrl, tmp;
1010 :
1011 0 : if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl))
1012 : return false;
1013 0 : if (on)
1014 0 : ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
1015 : else
1016 0 : ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
1017 0 : if (wrmsrl_safe(MSR_TEST_CTRL, ctrl))
1018 : return false;
1019 0 : rdmsrl(MSR_TEST_CTRL, tmp);
1020 0 : return ctrl == tmp;
1021 : }
1022 :
1023 0 : static void __init split_lock_setup(void)
1024 : {
1025 0 : enum split_lock_detect_state state = sld_warn;
1026 0 : char arg[20];
1027 0 : int i, ret;
1028 :
1029 0 : if (!split_lock_verify_msr(false)) {
1030 0 : pr_info("MSR access failed: Disabled\n");
1031 0 : return;
1032 : }
1033 :
1034 0 : ret = cmdline_find_option(boot_command_line, "split_lock_detect",
1035 : arg, sizeof(arg));
1036 0 : if (ret >= 0) {
1037 0 : for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
1038 0 : if (match_option(arg, ret, sld_options[i].option)) {
1039 0 : state = sld_options[i].state;
1040 0 : break;
1041 : }
1042 : }
1043 : }
1044 :
1045 0 : switch (state) {
1046 0 : case sld_off:
1047 0 : pr_info("disabled\n");
1048 0 : return;
1049 0 : case sld_warn:
1050 0 : pr_info("warning about user-space split_locks\n");
1051 0 : break;
1052 0 : case sld_fatal:
1053 0 : pr_info("sending SIGBUS on user-space split_locks\n");
1054 0 : break;
1055 : }
1056 :
1057 0 : rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
1058 :
1059 0 : if (!split_lock_verify_msr(true)) {
1060 0 : pr_info("MSR access failed: Disabled\n");
1061 0 : return;
1062 : }
1063 :
1064 0 : sld_state = state;
1065 0 : setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
1066 : }
1067 :
1068 : /*
1069 : * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
1070 : * is not implemented as one thread could undo the setting of the other
1071 : * thread immediately after dropping the lock anyway.
1072 : */
1073 0 : static void sld_update_msr(bool on)
1074 : {
1075 0 : u64 test_ctrl_val = msr_test_ctrl_cache;
1076 :
1077 0 : if (on)
1078 0 : test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
1079 :
1080 0 : wrmsrl(MSR_TEST_CTRL, test_ctrl_val);
1081 0 : }
1082 :
1083 4 : static void split_lock_init(void)
1084 : {
1085 4 : if (cpu_model_supports_sld)
1086 0 : split_lock_verify_msr(sld_state != sld_off);
1087 4 : }
1088 :
1089 0 : static void split_lock_warn(unsigned long ip)
1090 : {
1091 0 : pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
1092 : current->comm, current->pid, ip);
1093 :
1094 : /*
1095 : * Disable the split lock detection for this task so it can make
1096 : * progress and set TIF_SLD so the detection is re-enabled via
1097 : * switch_to_sld() when the task is scheduled out.
1098 : */
1099 0 : sld_update_msr(false);
1100 0 : set_tsk_thread_flag(current, TIF_SLD);
1101 0 : }
1102 :
1103 0 : bool handle_guest_split_lock(unsigned long ip)
1104 : {
1105 0 : if (sld_state == sld_warn) {
1106 0 : split_lock_warn(ip);
1107 0 : return true;
1108 : }
1109 :
1110 0 : pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
1111 : current->comm, current->pid,
1112 : sld_state == sld_fatal ? "fatal" : "bogus", ip);
1113 :
1114 0 : current->thread.error_code = 0;
1115 0 : current->thread.trap_nr = X86_TRAP_AC;
1116 0 : force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
1117 0 : return false;
1118 : }
1119 : EXPORT_SYMBOL_GPL(handle_guest_split_lock);
1120 :
1121 0 : bool handle_user_split_lock(struct pt_regs *regs, long error_code)
1122 : {
1123 0 : if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
1124 : return false;
1125 0 : split_lock_warn(regs->ip);
1126 0 : return true;
1127 : }
1128 :
1129 : /*
1130 : * This function is called only when switching between tasks with
1131 : * different split-lock detection modes. It sets the MSR for the
1132 : * mode of the new task. This is right most of the time, but since
1133 : * the MSR is shared by hyperthreads on a physical core there can
1134 : * be glitches when the two threads need different modes.
1135 : */
1136 0 : void switch_to_sld(unsigned long tifn)
1137 : {
1138 0 : sld_update_msr(!(tifn & _TIF_SLD));
1139 0 : }
1140 :
1141 : /*
1142 : * Bits in the IA32_CORE_CAPABILITIES are not architectural, so they should
1143 : * only be trusted if it is confirmed that a CPU model implements a
1144 : * specific feature at a particular bit position.
1145 : *
1146 : * The possible driver data field values:
1147 : *
1148 : * - 0: CPU models that are known to have the per-core split-lock detection
1149 : * feature even though they do not enumerate IA32_CORE_CAPABILITIES.
1150 : *
1151 : * - 1: CPU models which may enumerate IA32_CORE_CAPABILITIES and if so use
1152 : * bit 5 to enumerate the per-core split-lock detection feature.
1153 : */
1154 : static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
1155 : X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0),
1156 : X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0),
1157 : X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0),
1158 : X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, 1),
1159 : X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, 1),
1160 : X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1),
1161 : X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, 1),
1162 : X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1),
1163 : X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1),
1164 : X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1),
1165 : X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 1),
1166 : {}
1167 : };
1168 :
1169 1 : void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
1170 : {
1171 1 : const struct x86_cpu_id *m;
1172 1 : u64 ia32_core_caps;
1173 :
1174 1 : if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1175 : return;
1176 :
1177 0 : m = x86_match_cpu(split_lock_cpu_ids);
1178 0 : if (!m)
1179 : return;
1180 :
1181 0 : switch (m->driver_data) {
1182 : case 0:
1183 : break;
1184 0 : case 1:
1185 0 : if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES))
1186 : return;
1187 0 : rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
1188 0 : if (!(ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT))
1189 : return;
1190 : break;
1191 : default:
1192 : return;
1193 : }
1194 :
1195 0 : cpu_model_supports_sld = true;
1196 0 : split_lock_setup();
1197 : }
|