Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /* cpu_feature_enabled() cannot be used this early */
3 : #define USE_EARLY_PGTABLE_L5
4 :
5 : #include <linux/memblock.h>
6 : #include <linux/linkage.h>
7 : #include <linux/bitops.h>
8 : #include <linux/kernel.h>
9 : #include <linux/export.h>
10 : #include <linux/percpu.h>
11 : #include <linux/string.h>
12 : #include <linux/ctype.h>
13 : #include <linux/delay.h>
14 : #include <linux/sched/mm.h>
15 : #include <linux/sched/clock.h>
16 : #include <linux/sched/task.h>
17 : #include <linux/sched/smt.h>
18 : #include <linux/init.h>
19 : #include <linux/kprobes.h>
20 : #include <linux/kgdb.h>
21 : #include <linux/smp.h>
22 : #include <linux/io.h>
23 : #include <linux/syscore_ops.h>
24 : #include <linux/pgtable.h>
25 :
26 : #include <asm/cmdline.h>
27 : #include <asm/stackprotector.h>
28 : #include <asm/perf_event.h>
29 : #include <asm/mmu_context.h>
30 : #include <asm/doublefault.h>
31 : #include <asm/archrandom.h>
32 : #include <asm/hypervisor.h>
33 : #include <asm/processor.h>
34 : #include <asm/tlbflush.h>
35 : #include <asm/debugreg.h>
36 : #include <asm/sections.h>
37 : #include <asm/vsyscall.h>
38 : #include <linux/topology.h>
39 : #include <linux/cpumask.h>
40 : #include <linux/atomic.h>
41 : #include <asm/proto.h>
42 : #include <asm/setup.h>
43 : #include <asm/apic.h>
44 : #include <asm/desc.h>
45 : #include <asm/fpu/internal.h>
46 : #include <asm/mtrr.h>
47 : #include <asm/hwcap2.h>
48 : #include <linux/numa.h>
49 : #include <asm/numa.h>
50 : #include <asm/asm.h>
51 : #include <asm/bugs.h>
52 : #include <asm/cpu.h>
53 : #include <asm/mce.h>
54 : #include <asm/msr.h>
55 : #include <asm/memtype.h>
56 : #include <asm/microcode.h>
57 : #include <asm/microcode_intel.h>
58 : #include <asm/intel-family.h>
59 : #include <asm/cpu_device_id.h>
60 : #include <asm/uv/uv.h>
61 :
62 : #include "cpu.h"
63 :
64 : u32 elf_hwcap2 __read_mostly;
65 :
66 : /* all of these masks are initialized in setup_cpu_local_masks() */
67 : cpumask_var_t cpu_initialized_mask;
68 : cpumask_var_t cpu_callout_mask;
69 : cpumask_var_t cpu_callin_mask;
70 :
71 : /* representing cpus for which sibling maps can be computed */
72 : cpumask_var_t cpu_sibling_setup_mask;
73 :
74 : /* Number of siblings per CPU package */
75 : int smp_num_siblings = 1;
76 : EXPORT_SYMBOL(smp_num_siblings);
77 :
78 : /* Last level cache ID of each logical CPU */
79 : DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
80 :
81 : /* correctly size the local cpu masks */
82 1 : void __init setup_cpu_local_masks(void)
83 : {
84 1 : alloc_bootmem_cpumask_var(&cpu_initialized_mask);
85 1 : alloc_bootmem_cpumask_var(&cpu_callin_mask);
86 1 : alloc_bootmem_cpumask_var(&cpu_callout_mask);
87 1 : alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
88 1 : }
89 :
90 0 : static void default_init(struct cpuinfo_x86 *c)
91 : {
92 : #ifdef CONFIG_X86_64
93 0 : cpu_detect_cache_sizes(c);
94 : #else
95 : /* Not much we can do here... */
96 : /* Check if at least it has cpuid */
97 : if (c->cpuid_level == -1) {
98 : /* No cpuid. It must be an ancient CPU */
99 : if (c->x86 == 4)
100 : strcpy(c->x86_model_id, "486");
101 : else if (c->x86 == 3)
102 : strcpy(c->x86_model_id, "386");
103 : }
104 : #endif
105 0 : }
106 :
107 : static const struct cpu_dev default_cpu = {
108 : .c_init = default_init,
109 : .c_vendor = "Unknown",
110 : .c_x86_vendor = X86_VENDOR_UNKNOWN,
111 : };
112 :
113 : static const struct cpu_dev *this_cpu = &default_cpu;
114 :
115 : DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
116 : #ifdef CONFIG_X86_64
117 : /*
118 : * We need valid kernel segments for data and code in long mode too
119 : * IRET will check the segment types kkeil 2000/10/28
120 : * Also sysret mandates a special GDT layout
121 : *
122 : * TLS descriptors are currently at a different place compared to i386.
123 : * Hopefully nobody expects them at a fixed place (Wine?)
124 : */
125 : [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
126 : [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
127 : [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
128 : [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff),
129 : [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff),
130 : [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff),
131 : #else
132 : [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff),
133 : [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
134 : [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff),
135 : [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff),
136 : /*
137 : * Segments used for calling PnP BIOS have byte granularity.
138 : * They code segments and data segments have fixed 64k limits,
139 : * the transfer segment sizes are set at run time.
140 : */
141 : /* 32-bit code */
142 : [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
143 : /* 16-bit code */
144 : [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
145 : /* 16-bit data */
146 : [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff),
147 : /* 16-bit data */
148 : [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0),
149 : /* 16-bit data */
150 : [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0),
151 : /*
152 : * The APM segments have byte granularity and their bases
153 : * are set at run time. All have 64k limits.
154 : */
155 : /* 32-bit code */
156 : [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
157 : /* 16-bit code */
158 : [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
159 : /* data */
160 : [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff),
161 :
162 : [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
163 : [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
164 : GDT_STACK_CANARY_INIT
165 : #endif
166 : } };
167 : EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
168 :
169 : #ifdef CONFIG_X86_64
170 0 : static int __init x86_nopcid_setup(char *s)
171 : {
172 : /* nopcid doesn't accept parameters */
173 0 : if (s)
174 : return -EINVAL;
175 :
176 : /* do not emit a message if the feature is not present */
177 0 : if (!boot_cpu_has(X86_FEATURE_PCID))
178 : return 0;
179 :
180 0 : setup_clear_cpu_cap(X86_FEATURE_PCID);
181 0 : pr_info("nopcid: PCID feature disabled\n");
182 0 : return 0;
183 : }
184 : early_param("nopcid", x86_nopcid_setup);
185 : #endif
186 :
187 0 : static int __init x86_noinvpcid_setup(char *s)
188 : {
189 : /* noinvpcid doesn't accept parameters */
190 0 : if (s)
191 : return -EINVAL;
192 :
193 : /* do not emit a message if the feature is not present */
194 0 : if (!boot_cpu_has(X86_FEATURE_INVPCID))
195 : return 0;
196 :
197 0 : setup_clear_cpu_cap(X86_FEATURE_INVPCID);
198 0 : pr_info("noinvpcid: INVPCID feature disabled\n");
199 0 : return 0;
200 : }
201 : early_param("noinvpcid", x86_noinvpcid_setup);
202 :
203 : #ifdef CONFIG_X86_32
204 : static int cachesize_override = -1;
205 : static int disable_x86_serial_nr = 1;
206 :
207 : static int __init cachesize_setup(char *str)
208 : {
209 : get_option(&str, &cachesize_override);
210 : return 1;
211 : }
212 : __setup("cachesize=", cachesize_setup);
213 :
214 : static int __init x86_sep_setup(char *s)
215 : {
216 : setup_clear_cpu_cap(X86_FEATURE_SEP);
217 : return 1;
218 : }
219 : __setup("nosep", x86_sep_setup);
220 :
221 : /* Standard macro to see if a specific flag is changeable */
222 : static inline int flag_is_changeable_p(u32 flag)
223 : {
224 : u32 f1, f2;
225 :
226 : /*
227 : * Cyrix and IDT cpus allow disabling of CPUID
228 : * so the code below may return different results
229 : * when it is executed before and after enabling
230 : * the CPUID. Add "volatile" to not allow gcc to
231 : * optimize the subsequent calls to this function.
232 : */
233 : asm volatile ("pushfl \n\t"
234 : "pushfl \n\t"
235 : "popl %0 \n\t"
236 : "movl %0, %1 \n\t"
237 : "xorl %2, %0 \n\t"
238 : "pushl %0 \n\t"
239 : "popfl \n\t"
240 : "pushfl \n\t"
241 : "popl %0 \n\t"
242 : "popfl \n\t"
243 :
244 : : "=&r" (f1), "=&r" (f2)
245 : : "ir" (flag));
246 :
247 : return ((f1^f2) & flag) != 0;
248 : }
249 :
250 : /* Probe for the CPUID instruction */
251 : int have_cpuid_p(void)
252 : {
253 : return flag_is_changeable_p(X86_EFLAGS_ID);
254 : }
255 :
256 : static void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
257 : {
258 : unsigned long lo, hi;
259 :
260 : if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr)
261 : return;
262 :
263 : /* Disable processor serial number: */
264 :
265 : rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
266 : lo |= 0x200000;
267 : wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
268 :
269 : pr_notice("CPU serial number disabled.\n");
270 : clear_cpu_cap(c, X86_FEATURE_PN);
271 :
272 : /* Disabling the serial number may affect the cpuid level */
273 : c->cpuid_level = cpuid_eax(0);
274 : }
275 :
276 : static int __init x86_serial_nr_setup(char *s)
277 : {
278 : disable_x86_serial_nr = 0;
279 : return 1;
280 : }
281 : __setup("serialnumber", x86_serial_nr_setup);
282 : #else
283 : static inline int flag_is_changeable_p(u32 flag)
284 : {
285 : return 1;
286 : }
287 4 : static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
288 : {
289 4 : }
290 : #endif
291 :
292 0 : static __init int setup_disable_smep(char *arg)
293 : {
294 0 : setup_clear_cpu_cap(X86_FEATURE_SMEP);
295 0 : return 1;
296 : }
297 : __setup("nosmep", setup_disable_smep);
298 :
299 4 : static __always_inline void setup_smep(struct cpuinfo_x86 *c)
300 : {
301 4 : if (cpu_has(c, X86_FEATURE_SMEP))
302 4 : cr4_set_bits(X86_CR4_SMEP);
303 : }
304 :
305 0 : static __init int setup_disable_smap(char *arg)
306 : {
307 0 : setup_clear_cpu_cap(X86_FEATURE_SMAP);
308 0 : return 1;
309 : }
310 : __setup("nosmap", setup_disable_smap);
311 :
312 4 : static __always_inline void setup_smap(struct cpuinfo_x86 *c)
313 : {
314 8 : unsigned long eflags = native_save_fl();
315 :
316 : /* This should have been cleared long ago */
317 4 : BUG_ON(eflags & X86_EFLAGS_AC);
318 :
319 4 : if (cpu_has(c, X86_FEATURE_SMAP)) {
320 : #ifdef CONFIG_X86_SMAP
321 : cr4_set_bits(X86_CR4_SMAP);
322 : #else
323 0 : cr4_clear_bits(X86_CR4_SMAP);
324 : #endif
325 : }
326 : }
327 :
328 4 : static __always_inline void setup_umip(struct cpuinfo_x86 *c)
329 : {
330 : /* Check the boot processor, plus build option for UMIP. */
331 4 : if (!cpu_feature_enabled(X86_FEATURE_UMIP))
332 4 : goto out;
333 :
334 : /* Check the current processor's cpuid bits. */
335 : if (!cpu_has(c, X86_FEATURE_UMIP))
336 : goto out;
337 :
338 : cr4_set_bits(X86_CR4_UMIP);
339 :
340 : pr_info_once("x86/cpu: User Mode Instruction Prevention (UMIP) activated\n");
341 :
342 : return;
343 :
344 4 : out:
345 : /*
346 : * Make sure UMIP is disabled in case it was enabled in a
347 : * previous boot (e.g., via kexec).
348 : */
349 4 : cr4_clear_bits(X86_CR4_UMIP);
350 : }
351 :
352 : /* These bits should not change their value after CPU init is finished. */
353 : static const unsigned long cr4_pinned_mask =
354 : X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE;
355 : static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
356 : static unsigned long cr4_pinned_bits __ro_after_init;
357 :
358 5 : void native_write_cr0(unsigned long val)
359 : {
360 5 : unsigned long bits_missing = 0;
361 :
362 5 : set_register:
363 5 : asm volatile("mov %0,%%cr0": "+r" (val) : : "memory");
364 :
365 5 : if (static_branch_likely(&cr_pinning)) {
366 3 : if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
367 0 : bits_missing = X86_CR0_WP;
368 0 : val |= bits_missing;
369 0 : goto set_register;
370 : }
371 : /* Warn after we've set the missing bits. */
372 3 : WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
373 : }
374 5 : }
375 : EXPORT_SYMBOL(native_write_cr0);
376 :
377 15 : void native_write_cr4(unsigned long val)
378 : {
379 15 : unsigned long bits_changed = 0;
380 :
381 15 : set_register:
382 15 : asm volatile("mov %0,%%cr4": "+r" (val) : : "memory");
383 :
384 15 : if (static_branch_likely(&cr_pinning)) {
385 9 : if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) {
386 0 : bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits;
387 0 : val = (val & ~cr4_pinned_mask) | cr4_pinned_bits;
388 0 : goto set_register;
389 : }
390 : /* Warn after we've corrected the changed bits. */
391 9 : WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n",
392 : bits_changed);
393 : }
394 15 : }
395 : #if IS_MODULE(CONFIG_LKDTM)
396 : EXPORT_SYMBOL_GPL(native_write_cr4);
397 : #endif
398 :
399 12895 : void cr4_update_irqsoff(unsigned long set, unsigned long clear)
400 : {
401 12895 : unsigned long newval, cr4 = this_cpu_read(cpu_tlbstate.cr4);
402 :
403 25792 : lockdep_assert_irqs_disabled();
404 :
405 12897 : newval = (cr4 & ~clear) | set;
406 12897 : if (newval != cr4) {
407 12 : this_cpu_write(cpu_tlbstate.cr4, newval);
408 12 : __write_cr4(newval);
409 : }
410 12897 : }
411 : EXPORT_SYMBOL(cr4_update_irqsoff);
412 :
413 : /* Read the CR4 shadow. */
414 4 : unsigned long cr4_read_shadow(void)
415 : {
416 4 : return this_cpu_read(cpu_tlbstate.cr4);
417 : }
418 : EXPORT_SYMBOL_GPL(cr4_read_shadow);
419 :
420 3 : void cr4_init(void)
421 : {
422 3 : unsigned long cr4 = __read_cr4();
423 :
424 3 : if (boot_cpu_has(X86_FEATURE_PCID))
425 3 : cr4 |= X86_CR4_PCIDE;
426 3 : if (static_branch_likely(&cr_pinning))
427 3 : cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits;
428 :
429 3 : __write_cr4(cr4);
430 :
431 : /* Initialize cr4 shadow for this CPU. */
432 3 : this_cpu_write(cpu_tlbstate.cr4, cr4);
433 3 : }
434 :
435 : /*
436 : * Once CPU feature detection is finished (and boot params have been
437 : * parsed), record any of the sensitive CR bits that are set, and
438 : * enable CR pinning.
439 : */
440 1 : static void __init setup_cr_pinning(void)
441 : {
442 1 : cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask;
443 1 : static_key_enable(&cr_pinning.key);
444 1 : }
445 :
446 0 : static __init int x86_nofsgsbase_setup(char *arg)
447 : {
448 : /* Require an exact match without trailing characters. */
449 0 : if (strlen(arg))
450 : return 0;
451 :
452 : /* Do not emit a message if the feature is not present. */
453 0 : if (!boot_cpu_has(X86_FEATURE_FSGSBASE))
454 : return 1;
455 :
456 0 : setup_clear_cpu_cap(X86_FEATURE_FSGSBASE);
457 0 : pr_info("FSGSBASE disabled via kernel command line\n");
458 0 : return 1;
459 : }
460 : __setup("nofsgsbase", x86_nofsgsbase_setup);
461 :
462 : /*
463 : * Protection Keys are not available in 32-bit mode.
464 : */
465 : static bool pku_disabled;
466 :
467 4 : static __always_inline void setup_pku(struct cpuinfo_x86 *c)
468 : {
469 4 : struct pkru_state *pk;
470 :
471 : /* check the boot processor, plus compile options for PKU: */
472 4 : if (!cpu_feature_enabled(X86_FEATURE_PKU))
473 4 : return;
474 : /* checks the actual processor's cpuid bits: */
475 : if (!cpu_has(c, X86_FEATURE_PKU))
476 : return;
477 : if (pku_disabled)
478 : return;
479 :
480 : cr4_set_bits(X86_CR4_PKE);
481 : pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU);
482 : if (pk)
483 : pk->pkru = init_pkru_value;
484 : /*
485 : * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
486 : * cpuid bit to be set. We need to ensure that we
487 : * update that bit in this CPU's "cpu_info".
488 : */
489 : set_cpu_cap(c, X86_FEATURE_OSPKE);
490 : }
491 :
492 : #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
493 : static __init int setup_disable_pku(char *arg)
494 : {
495 : /*
496 : * Do not clear the X86_FEATURE_PKU bit. All of the
497 : * runtime checks are against OSPKE so clearing the
498 : * bit does nothing.
499 : *
500 : * This way, we will see "pku" in cpuinfo, but not
501 : * "ospke", which is exactly what we want. It shows
502 : * that the CPU has PKU, but the OS has not enabled it.
503 : * This happens to be exactly how a system would look
504 : * if we disabled the config option.
505 : */
506 : pr_info("x86: 'nopku' specified, disabling Memory Protection Keys\n");
507 : pku_disabled = true;
508 : return 1;
509 : }
510 : __setup("nopku", setup_disable_pku);
511 : #endif /* CONFIG_X86_64 */
512 :
513 : /*
514 : * Some CPU features depend on higher CPUID levels, which may not always
515 : * be available due to CPUID level capping or broken virtualization
516 : * software. Add those features to this table to auto-disable them.
517 : */
518 : struct cpuid_dependent_feature {
519 : u32 feature;
520 : u32 level;
521 : };
522 :
523 : static const struct cpuid_dependent_feature
524 : cpuid_dependent_features[] = {
525 : { X86_FEATURE_MWAIT, 0x00000005 },
526 : { X86_FEATURE_DCA, 0x00000009 },
527 : { X86_FEATURE_XSAVE, 0x0000000d },
528 : { 0, 0 }
529 : };
530 :
531 5 : static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
532 : {
533 5 : const struct cpuid_dependent_feature *df;
534 :
535 20 : for (df = cpuid_dependent_features; df->feature; df++) {
536 :
537 15 : if (!cpu_has(c, df->feature))
538 10 : continue;
539 : /*
540 : * Note: cpuid_level is set to -1 if unavailable, but
541 : * extended_extended_level is set to 0 if unavailable
542 : * and the legitimate extended levels are all negative
543 : * when signed; hence the weird messing around with
544 : * signs here...
545 : */
546 10 : if (!((s32)df->level < 0 ?
547 0 : (u32)df->level > (u32)c->extended_cpuid_level :
548 5 : (s32)df->level > (s32)c->cpuid_level))
549 5 : continue;
550 :
551 0 : clear_cpu_cap(c, df->feature);
552 0 : if (!warn)
553 0 : continue;
554 :
555 0 : pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
556 : x86_cap_flag(df->feature), df->level);
557 : }
558 5 : }
559 :
560 : /*
561 : * Naming convention should be: <Name> [(<Codename>)]
562 : * This table only is used unless init_<vendor>() below doesn't set it;
563 : * in particular, if CPUID levels 0x80000002..4 are supported, this
564 : * isn't used
565 : */
566 :
567 : /* Look up CPU names by table lookup. */
568 0 : static const char *table_lookup_model(struct cpuinfo_x86 *c)
569 : {
570 : #ifdef CONFIG_X86_32
571 : const struct legacy_cpu_model_info *info;
572 :
573 : if (c->x86_model >= 16)
574 : return NULL; /* Range check */
575 :
576 : if (!this_cpu)
577 : return NULL;
578 :
579 : info = this_cpu->legacy_models;
580 :
581 : while (info->family) {
582 : if (info->family == c->x86)
583 : return info->model_names[c->x86_model];
584 : info++;
585 : }
586 : #endif
587 0 : return NULL; /* Not found */
588 : }
589 :
590 : /* Aligned to unsigned long to avoid split lock in atomic bitmap ops */
591 : __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
592 : __u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
593 :
594 6 : void load_percpu_segment(int cpu)
595 : {
596 : #ifdef CONFIG_X86_32
597 : loadsegment(fs, __KERNEL_PERCPU);
598 : #else
599 6 : __loadsegment_simple(gs, 0);
600 6 : wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
601 : #endif
602 6 : load_stack_canary_segment();
603 6 : }
604 :
605 : #ifdef CONFIG_X86_32
606 : /* The 32-bit entry code needs to find cpu_entry_area. */
607 : DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
608 : #endif
609 :
610 : /* Load the original GDT from the per-cpu structure */
611 6 : void load_direct_gdt(int cpu)
612 : {
613 6 : struct desc_ptr gdt_descr;
614 :
615 0 : gdt_descr.address = (long)get_cpu_gdt_rw(cpu);
616 6 : gdt_descr.size = GDT_SIZE - 1;
617 6 : load_gdt(&gdt_descr);
618 0 : }
619 : EXPORT_SYMBOL_GPL(load_direct_gdt);
620 :
621 : /* Load a fixmap remapping of the per-cpu GDT */
622 4 : void load_fixmap_gdt(int cpu)
623 : {
624 4 : struct desc_ptr gdt_descr;
625 :
626 4 : gdt_descr.address = (long)get_cpu_gdt_ro(cpu);
627 4 : gdt_descr.size = GDT_SIZE - 1;
628 4 : load_gdt(&gdt_descr);
629 4 : }
630 : EXPORT_SYMBOL_GPL(load_fixmap_gdt);
631 :
632 : /*
633 : * Current gdt points %fs at the "master" per-cpu area: after this,
634 : * it's on the real one.
635 : */
636 6 : void switch_to_new_gdt(int cpu)
637 : {
638 : /* Load the original GDT */
639 6 : load_direct_gdt(cpu);
640 : /* Reload the per-cpu base */
641 6 : load_percpu_segment(cpu);
642 6 : }
643 :
644 : static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
645 :
646 4 : static void get_model_name(struct cpuinfo_x86 *c)
647 : {
648 4 : unsigned int *v;
649 4 : char *p, *q, *s;
650 :
651 4 : if (c->extended_cpuid_level < 0x80000004)
652 : return;
653 :
654 4 : v = (unsigned int *)c->x86_model_id;
655 4 : cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
656 4 : cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
657 4 : cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
658 4 : c->x86_model_id[48] = 0;
659 :
660 : /* Trim whitespace */
661 4 : p = q = s = &c->x86_model_id[0];
662 :
663 4 : while (*p == ' ')
664 0 : p++;
665 :
666 164 : while (*p) {
667 : /* Note the last non-whitespace index */
668 160 : if (!isspace(*p))
669 140 : s = q;
670 :
671 160 : *q++ = *p++;
672 : }
673 :
674 4 : *(s + 1) = '\0';
675 : }
676 :
677 0 : void detect_num_cpu_cores(struct cpuinfo_x86 *c)
678 : {
679 0 : unsigned int eax, ebx, ecx, edx;
680 :
681 0 : c->x86_max_cores = 1;
682 0 : if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
683 0 : return;
684 :
685 0 : cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
686 0 : if (eax & 0x1f)
687 0 : c->x86_max_cores = (eax >> 26) + 1;
688 : }
689 :
690 0 : void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
691 : {
692 0 : unsigned int n, dummy, ebx, ecx, edx, l2size;
693 :
694 0 : n = c->extended_cpuid_level;
695 :
696 0 : if (n >= 0x80000005) {
697 0 : cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
698 0 : c->x86_cache_size = (ecx>>24) + (edx>>24);
699 : #ifdef CONFIG_X86_64
700 : /* On K8 L1 TLB is inclusive, so don't count it */
701 0 : c->x86_tlbsize = 0;
702 : #endif
703 : }
704 :
705 0 : if (n < 0x80000006) /* Some chips just has a large L1. */
706 0 : return;
707 :
708 0 : cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
709 0 : l2size = ecx >> 16;
710 :
711 : #ifdef CONFIG_X86_64
712 0 : c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
713 : #else
714 : /* do processor-specific cache resizing */
715 : if (this_cpu->legacy_cache_size)
716 : l2size = this_cpu->legacy_cache_size(c, l2size);
717 :
718 : /* Allow user to override all this if necessary. */
719 : if (cachesize_override != -1)
720 : l2size = cachesize_override;
721 :
722 : if (l2size == 0)
723 : return; /* Again, no L2 cache is possible */
724 : #endif
725 :
726 0 : c->x86_cache_size = l2size;
727 : }
728 :
729 : u16 __read_mostly tlb_lli_4k[NR_INFO];
730 : u16 __read_mostly tlb_lli_2m[NR_INFO];
731 : u16 __read_mostly tlb_lli_4m[NR_INFO];
732 : u16 __read_mostly tlb_lld_4k[NR_INFO];
733 : u16 __read_mostly tlb_lld_2m[NR_INFO];
734 : u16 __read_mostly tlb_lld_4m[NR_INFO];
735 : u16 __read_mostly tlb_lld_1g[NR_INFO];
736 :
737 1 : static void cpu_detect_tlb(struct cpuinfo_x86 *c)
738 : {
739 1 : if (this_cpu->c_detect_tlb)
740 1 : this_cpu->c_detect_tlb(c);
741 :
742 1 : pr_info("Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n",
743 : tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
744 : tlb_lli_4m[ENTRIES]);
745 :
746 1 : pr_info("Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n",
747 : tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES],
748 : tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]);
749 1 : }
750 :
751 4 : int detect_ht_early(struct cpuinfo_x86 *c)
752 : {
753 : #ifdef CONFIG_SMP
754 4 : u32 eax, ebx, ecx, edx;
755 :
756 4 : if (!cpu_has(c, X86_FEATURE_HT))
757 : return -1;
758 :
759 0 : if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
760 : return -1;
761 :
762 0 : if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
763 : return -1;
764 :
765 0 : cpuid(1, &eax, &ebx, &ecx, &edx);
766 :
767 0 : smp_num_siblings = (ebx & 0xff0000) >> 16;
768 0 : if (smp_num_siblings == 1)
769 0 : pr_info_once("CPU0: Hyper-Threading is disabled\n");
770 : #endif
771 : return 0;
772 : }
773 :
774 4 : void detect_ht(struct cpuinfo_x86 *c)
775 : {
776 : #ifdef CONFIG_SMP
777 4 : int index_msb, core_bits;
778 :
779 4 : if (detect_ht_early(c) < 0)
780 : return;
781 :
782 0 : index_msb = get_count_order(smp_num_siblings);
783 0 : c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
784 :
785 0 : smp_num_siblings = smp_num_siblings / c->x86_max_cores;
786 :
787 0 : index_msb = get_count_order(smp_num_siblings);
788 :
789 0 : core_bits = get_count_order(c->x86_max_cores);
790 :
791 0 : c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
792 0 : ((1 << core_bits) - 1);
793 : #endif
794 : }
795 :
796 5 : static void get_cpu_vendor(struct cpuinfo_x86 *c)
797 : {
798 5 : char *v = c->x86_vendor_id;
799 5 : int i;
800 :
801 5 : for (i = 0; i < X86_VENDOR_NUM; i++) {
802 5 : if (!cpu_devs[i])
803 : break;
804 :
805 5 : if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
806 0 : (cpu_devs[i]->c_ident[1] &&
807 0 : !strcmp(v, cpu_devs[i]->c_ident[1]))) {
808 :
809 5 : this_cpu = cpu_devs[i];
810 5 : c->x86_vendor = this_cpu->c_x86_vendor;
811 5 : return;
812 : }
813 : }
814 :
815 0 : pr_err_once("CPU: vendor_id '%s' unknown, using generic init.\n" \
816 : "CPU: Your system may be unstable.\n", v);
817 :
818 0 : c->x86_vendor = X86_VENDOR_UNKNOWN;
819 0 : this_cpu = &default_cpu;
820 : }
821 :
822 5 : void cpu_detect(struct cpuinfo_x86 *c)
823 : {
824 : /* Get vendor name */
825 10 : cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
826 5 : (unsigned int *)&c->x86_vendor_id[0],
827 5 : (unsigned int *)&c->x86_vendor_id[8],
828 5 : (unsigned int *)&c->x86_vendor_id[4]);
829 :
830 5 : c->x86 = 4;
831 : /* Intel-defined flags: level 0x00000001 */
832 5 : if (c->cpuid_level >= 0x00000001) {
833 5 : u32 junk, tfms, cap0, misc;
834 :
835 5 : cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
836 5 : c->x86 = x86_family(tfms);
837 5 : c->x86_model = x86_model(tfms);
838 5 : c->x86_stepping = x86_stepping(tfms);
839 :
840 5 : if (cap0 & (1<<19)) {
841 5 : c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
842 5 : c->x86_cache_alignment = c->x86_clflush_size;
843 : }
844 : }
845 5 : }
846 :
847 13 : static void apply_forced_caps(struct cpuinfo_x86 *c)
848 : {
849 13 : int i;
850 :
851 286 : for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
852 273 : c->x86_capability[i] &= ~cpu_caps_cleared[i];
853 273 : c->x86_capability[i] |= cpu_caps_set[i];
854 : }
855 13 : }
856 :
857 5 : static void init_speculation_control(struct cpuinfo_x86 *c)
858 : {
859 : /*
860 : * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
861 : * and they also have a different bit for STIBP support. Also,
862 : * a hypervisor might have set the individual AMD bits even on
863 : * Intel CPUs, for finer-grained selection of what's available.
864 : */
865 5 : if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
866 5 : set_cpu_cap(c, X86_FEATURE_IBRS);
867 5 : set_cpu_cap(c, X86_FEATURE_IBPB);
868 5 : set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
869 : }
870 :
871 5 : if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
872 5 : set_cpu_cap(c, X86_FEATURE_STIBP);
873 :
874 5 : if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) ||
875 0 : cpu_has(c, X86_FEATURE_VIRT_SSBD))
876 5 : set_cpu_cap(c, X86_FEATURE_SSBD);
877 :
878 5 : if (cpu_has(c, X86_FEATURE_AMD_IBRS)) {
879 0 : set_cpu_cap(c, X86_FEATURE_IBRS);
880 0 : set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
881 : }
882 :
883 5 : if (cpu_has(c, X86_FEATURE_AMD_IBPB))
884 5 : set_cpu_cap(c, X86_FEATURE_IBPB);
885 :
886 5 : if (cpu_has(c, X86_FEATURE_AMD_STIBP)) {
887 5 : set_cpu_cap(c, X86_FEATURE_STIBP);
888 5 : set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
889 : }
890 :
891 5 : if (cpu_has(c, X86_FEATURE_AMD_SSBD)) {
892 5 : set_cpu_cap(c, X86_FEATURE_SSBD);
893 5 : set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
894 5 : clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD);
895 : }
896 5 : }
897 :
898 5 : void get_cpu_cap(struct cpuinfo_x86 *c)
899 : {
900 5 : u32 eax, ebx, ecx, edx;
901 :
902 : /* Intel-defined flags: level 0x00000001 */
903 5 : if (c->cpuid_level >= 0x00000001) {
904 5 : cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
905 :
906 5 : c->x86_capability[CPUID_1_ECX] = ecx;
907 5 : c->x86_capability[CPUID_1_EDX] = edx;
908 : }
909 :
910 : /* Thermal and Power Management Leaf: level 0x00000006 (eax) */
911 5 : if (c->cpuid_level >= 0x00000006)
912 5 : c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006);
913 :
914 : /* Additional Intel-defined flags: level 0x00000007 */
915 5 : if (c->cpuid_level >= 0x00000007) {
916 5 : cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
917 5 : c->x86_capability[CPUID_7_0_EBX] = ebx;
918 5 : c->x86_capability[CPUID_7_ECX] = ecx;
919 5 : c->x86_capability[CPUID_7_EDX] = edx;
920 :
921 : /* Check valid sub-leaf index before accessing it */
922 5 : if (eax >= 1) {
923 0 : cpuid_count(0x00000007, 1, &eax, &ebx, &ecx, &edx);
924 0 : c->x86_capability[CPUID_7_1_EAX] = eax;
925 : }
926 : }
927 :
928 : /* Extended state features: level 0x0000000d */
929 5 : if (c->cpuid_level >= 0x0000000d) {
930 5 : cpuid_count(0x0000000d, 1, &eax, &ebx, &ecx, &edx);
931 :
932 5 : c->x86_capability[CPUID_D_1_EAX] = eax;
933 : }
934 :
935 : /* AMD-defined flags: level 0x80000001 */
936 5 : eax = cpuid_eax(0x80000000);
937 5 : c->extended_cpuid_level = eax;
938 :
939 5 : if ((eax & 0xffff0000) == 0x80000000) {
940 5 : if (eax >= 0x80000001) {
941 5 : cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
942 :
943 5 : c->x86_capability[CPUID_8000_0001_ECX] = ecx;
944 5 : c->x86_capability[CPUID_8000_0001_EDX] = edx;
945 : }
946 : }
947 :
948 5 : if (c->extended_cpuid_level >= 0x80000007) {
949 5 : cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
950 :
951 5 : c->x86_capability[CPUID_8000_0007_EBX] = ebx;
952 5 : c->x86_power = edx;
953 : }
954 :
955 5 : if (c->extended_cpuid_level >= 0x80000008) {
956 5 : cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
957 5 : c->x86_capability[CPUID_8000_0008_EBX] = ebx;
958 : }
959 :
960 5 : if (c->extended_cpuid_level >= 0x8000000a)
961 0 : c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
962 :
963 5 : if (c->extended_cpuid_level >= 0x8000001f)
964 0 : c->x86_capability[CPUID_8000_001F_EAX] = cpuid_eax(0x8000001f);
965 :
966 5 : init_scattered_cpuid_features(c);
967 5 : init_speculation_control(c);
968 :
969 : /*
970 : * Clear/Set all flags overridden by options, after probe.
971 : * This needs to happen each time we re-probe, which may happen
972 : * several times during CPU initialization.
973 : */
974 5 : apply_forced_caps(c);
975 5 : }
976 :
977 5 : void get_cpu_address_sizes(struct cpuinfo_x86 *c)
978 : {
979 5 : u32 eax, ebx, ecx, edx;
980 :
981 0 : if (c->extended_cpuid_level >= 0x80000008) {
982 5 : cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
983 :
984 5 : c->x86_virt_bits = (eax >> 8) & 0xff;
985 5 : c->x86_phys_bits = eax & 0xff;
986 : }
987 : #ifdef CONFIG_X86_32
988 : else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
989 : c->x86_phys_bits = 36;
990 : #endif
991 5 : c->x86_cache_bits = c->x86_phys_bits;
992 0 : }
993 :
994 : static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
995 : {
996 : #ifdef CONFIG_X86_32
997 : int i;
998 :
999 : /*
1000 : * First of all, decide if this is a 486 or higher
1001 : * It's a 486 if we can modify the AC flag
1002 : */
1003 : if (flag_is_changeable_p(X86_EFLAGS_AC))
1004 : c->x86 = 4;
1005 : else
1006 : c->x86 = 3;
1007 :
1008 : for (i = 0; i < X86_VENDOR_NUM; i++)
1009 : if (cpu_devs[i] && cpu_devs[i]->c_identify) {
1010 : c->x86_vendor_id[0] = 0;
1011 : cpu_devs[i]->c_identify(c);
1012 : if (c->x86_vendor_id[0]) {
1013 : get_cpu_vendor(c);
1014 : break;
1015 : }
1016 : }
1017 : #endif
1018 : }
1019 :
1020 : #define NO_SPECULATION BIT(0)
1021 : #define NO_MELTDOWN BIT(1)
1022 : #define NO_SSB BIT(2)
1023 : #define NO_L1TF BIT(3)
1024 : #define NO_MDS BIT(4)
1025 : #define MSBDS_ONLY BIT(5)
1026 : #define NO_SWAPGS BIT(6)
1027 : #define NO_ITLB_MULTIHIT BIT(7)
1028 : #define NO_SPECTRE_V2 BIT(8)
1029 :
1030 : #define VULNWL(vendor, family, model, whitelist) \
1031 : X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
1032 :
1033 : #define VULNWL_INTEL(model, whitelist) \
1034 : VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist)
1035 :
1036 : #define VULNWL_AMD(family, whitelist) \
1037 : VULNWL(AMD, family, X86_MODEL_ANY, whitelist)
1038 :
1039 : #define VULNWL_HYGON(family, whitelist) \
1040 : VULNWL(HYGON, family, X86_MODEL_ANY, whitelist)
1041 :
1042 : static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
1043 : VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION),
1044 : VULNWL(CENTAUR, 5, X86_MODEL_ANY, NO_SPECULATION),
1045 : VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION),
1046 : VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION),
1047 :
1048 : /* Intel Family 6 */
1049 : VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
1050 : VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT),
1051 : VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
1052 : VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
1053 : VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
1054 :
1055 : VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
1056 : VULNWL_INTEL(ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
1057 : VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
1058 : VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
1059 : VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
1060 : VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
1061 :
1062 : VULNWL_INTEL(CORE_YONAH, NO_SSB),
1063 :
1064 : VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
1065 : VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
1066 :
1067 : VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
1068 : VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
1069 : VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
1070 :
1071 : /*
1072 : * Technically, swapgs isn't serializing on AMD (despite it previously
1073 : * being documented as such in the APM). But according to AMD, %gs is
1074 : * updated non-speculatively, and the issuing of %gs-relative memory
1075 : * operands will be blocked until the %gs update completes, which is
1076 : * good enough for our purposes.
1077 : */
1078 :
1079 : VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT),
1080 :
1081 : /* AMD Family 0xf - 0x12 */
1082 : VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
1083 : VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
1084 : VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
1085 : VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
1086 :
1087 : /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
1088 : VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
1089 : VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
1090 :
1091 : /* Zhaoxin Family 7 */
1092 : VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS),
1093 : VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS),
1094 : {}
1095 : };
1096 :
1097 : #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \
1098 : X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \
1099 : INTEL_FAM6_##model, steppings, \
1100 : X86_FEATURE_ANY, issues)
1101 :
1102 : #define SRBDS BIT(0)
1103 :
1104 : static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
1105 : VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
1106 : VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS),
1107 : VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS),
1108 : VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS),
1109 : VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),
1110 : VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
1111 : VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS),
1112 : VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS),
1113 : VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xC), SRBDS),
1114 : VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xD), SRBDS),
1115 : {}
1116 : };
1117 :
1118 10 : static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long which)
1119 : {
1120 10 : const struct x86_cpu_id *m = x86_match_cpu(table);
1121 :
1122 10 : return m && !!(m->driver_data & which);
1123 : }
1124 :
1125 3 : u64 x86_read_arch_cap_msr(void)
1126 : {
1127 3 : u64 ia32_cap = 0;
1128 :
1129 3 : if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
1130 3 : rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
1131 :
1132 3 : return ia32_cap;
1133 : }
1134 :
1135 1 : static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
1136 : {
1137 1 : u64 ia32_cap = x86_read_arch_cap_msr();
1138 :
1139 : /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
1140 1 : if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) &&
1141 1 : !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
1142 0 : setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
1143 :
1144 1 : if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION))
1145 : return;
1146 :
1147 1 : setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
1148 :
1149 1 : if (!cpu_matches(cpu_vuln_whitelist, NO_SPECTRE_V2))
1150 1 : setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
1151 :
1152 1 : if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
1153 1 : !(ia32_cap & ARCH_CAP_SSB_NO) &&
1154 1 : !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
1155 1 : setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
1156 :
1157 1 : if (ia32_cap & ARCH_CAP_IBRS_ALL)
1158 0 : setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
1159 :
1160 1 : if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) &&
1161 1 : !(ia32_cap & ARCH_CAP_MDS_NO)) {
1162 1 : setup_force_cpu_bug(X86_BUG_MDS);
1163 1 : if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY))
1164 0 : setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
1165 : }
1166 :
1167 1 : if (!cpu_matches(cpu_vuln_whitelist, NO_SWAPGS))
1168 1 : setup_force_cpu_bug(X86_BUG_SWAPGS);
1169 :
1170 : /*
1171 : * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when:
1172 : * - TSX is supported or
1173 : * - TSX_CTRL is present
1174 : *
1175 : * TSX_CTRL check is needed for cases when TSX could be disabled before
1176 : * the kernel boot e.g. kexec.
1177 : * TSX_CTRL check alone is not sufficient for cases when the microcode
1178 : * update is not present or running as guest that don't get TSX_CTRL.
1179 : */
1180 1 : if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
1181 1 : (cpu_has(c, X86_FEATURE_RTM) ||
1182 1 : (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
1183 0 : setup_force_cpu_bug(X86_BUG_TAA);
1184 :
1185 : /*
1186 : * SRBDS affects CPUs which support RDRAND or RDSEED and are listed
1187 : * in the vulnerability blacklist.
1188 : */
1189 1 : if ((cpu_has(c, X86_FEATURE_RDRAND) ||
1190 1 : cpu_has(c, X86_FEATURE_RDSEED)) &&
1191 1 : cpu_matches(cpu_vuln_blacklist, SRBDS))
1192 1 : setup_force_cpu_bug(X86_BUG_SRBDS);
1193 :
1194 1 : if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
1195 : return;
1196 :
1197 : /* Rogue Data Cache Load? No! */
1198 1 : if (ia32_cap & ARCH_CAP_RDCL_NO)
1199 : return;
1200 :
1201 1 : setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
1202 :
1203 1 : if (cpu_matches(cpu_vuln_whitelist, NO_L1TF))
1204 : return;
1205 :
1206 1 : setup_force_cpu_bug(X86_BUG_L1TF);
1207 : }
1208 :
1209 : /*
1210 : * The NOPL instruction is supposed to exist on all CPUs of family >= 6;
1211 : * unfortunately, that's not true in practice because of early VIA
1212 : * chips and (more importantly) broken virtualizers that are not easy
1213 : * to detect. In the latter case it doesn't even *fail* reliably, so
1214 : * probing for it doesn't even work. Disable it completely on 32-bit
1215 : * unless we can find a reliable way to detect all the broken cases.
1216 : * Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
1217 : */
1218 1 : static void detect_nopl(void)
1219 : {
1220 : #ifdef CONFIG_X86_32
1221 : setup_clear_cpu_cap(X86_FEATURE_NOPL);
1222 : #else
1223 1 : setup_force_cpu_cap(X86_FEATURE_NOPL);
1224 : #endif
1225 1 : }
1226 :
1227 : /*
1228 : * We parse cpu parameters early because fpu__init_system() is executed
1229 : * before parse_early_param().
1230 : */
1231 1 : static void __init cpu_parse_early_param(void)
1232 : {
1233 1 : char arg[128];
1234 1 : char *argptr = arg;
1235 1 : int arglen, res, bit;
1236 :
1237 : #ifdef CONFIG_X86_32
1238 : if (cmdline_find_option_bool(boot_command_line, "no387"))
1239 : #ifdef CONFIG_MATH_EMULATION
1240 : setup_clear_cpu_cap(X86_FEATURE_FPU);
1241 : #else
1242 : pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n");
1243 : #endif
1244 :
1245 : if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
1246 : setup_clear_cpu_cap(X86_FEATURE_FXSR);
1247 : #endif
1248 :
1249 1 : if (cmdline_find_option_bool(boot_command_line, "noxsave"))
1250 0 : setup_clear_cpu_cap(X86_FEATURE_XSAVE);
1251 :
1252 1 : if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
1253 0 : setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
1254 :
1255 1 : if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
1256 0 : setup_clear_cpu_cap(X86_FEATURE_XSAVES);
1257 :
1258 1 : arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
1259 1 : if (arglen <= 0)
1260 1 : return;
1261 :
1262 0 : pr_info("Clearing CPUID bits:");
1263 0 : do {
1264 0 : res = get_option(&argptr, &bit);
1265 0 : if (res == 0 || res == 3)
1266 : break;
1267 :
1268 : /* If the argument was too long, the last bit may be cut off */
1269 0 : if (res == 1 && arglen >= sizeof(arg))
1270 : break;
1271 :
1272 0 : if (bit >= 0 && bit < NCAPINTS * 32) {
1273 0 : pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
1274 0 : setup_clear_cpu_cap(bit);
1275 : }
1276 0 : } while (res == 2);
1277 0 : pr_cont("\n");
1278 : }
1279 :
1280 : /*
1281 : * Do minimum CPU detection early.
1282 : * Fields really needed: vendor, cpuid_level, family, model, mask,
1283 : * cache alignment.
1284 : * The others are not touched to avoid unwanted side effects.
1285 : *
1286 : * WARNING: this function is only called on the boot CPU. Don't add code
1287 : * here that is supposed to run on all CPUs.
1288 : */
1289 1 : static void __init early_identify_cpu(struct cpuinfo_x86 *c)
1290 : {
1291 : #ifdef CONFIG_X86_64
1292 1 : c->x86_clflush_size = 64;
1293 1 : c->x86_phys_bits = 36;
1294 1 : c->x86_virt_bits = 48;
1295 : #else
1296 : c->x86_clflush_size = 32;
1297 : c->x86_phys_bits = 32;
1298 : c->x86_virt_bits = 32;
1299 : #endif
1300 1 : c->x86_cache_alignment = c->x86_clflush_size;
1301 :
1302 1 : memset(&c->x86_capability, 0, sizeof(c->x86_capability));
1303 1 : c->extended_cpuid_level = 0;
1304 :
1305 1 : if (!have_cpuid_p())
1306 1 : identify_cpu_without_cpuid(c);
1307 :
1308 : /* cyrix could have cpuid enabled via c_identify()*/
1309 1 : if (have_cpuid_p()) {
1310 1 : cpu_detect(c);
1311 1 : get_cpu_vendor(c);
1312 1 : get_cpu_cap(c);
1313 1 : get_cpu_address_sizes(c);
1314 1 : setup_force_cpu_cap(X86_FEATURE_CPUID);
1315 1 : cpu_parse_early_param();
1316 :
1317 1 : if (this_cpu->c_early_init)
1318 1 : this_cpu->c_early_init(c);
1319 :
1320 1 : c->cpu_index = 0;
1321 1 : filter_cpuid_features(c, false);
1322 :
1323 1 : if (this_cpu->c_bsp_init)
1324 1 : this_cpu->c_bsp_init(c);
1325 : } else {
1326 : setup_clear_cpu_cap(X86_FEATURE_CPUID);
1327 : }
1328 :
1329 1 : setup_force_cpu_cap(X86_FEATURE_ALWAYS);
1330 :
1331 1 : cpu_set_bug_bits(c);
1332 :
1333 1 : cpu_set_core_cap_bits(c);
1334 :
1335 1 : fpu__init_system(c);
1336 :
1337 : #ifdef CONFIG_X86_32
1338 : /*
1339 : * Regardless of whether PCID is enumerated, the SDM says
1340 : * that it can't be enabled in 32-bit mode.
1341 : */
1342 : setup_clear_cpu_cap(X86_FEATURE_PCID);
1343 : #endif
1344 :
1345 : /*
1346 : * Later in the boot process pgtable_l5_enabled() relies on
1347 : * cpu_feature_enabled(X86_FEATURE_LA57). If 5-level paging is not
1348 : * enabled by this point we need to clear the feature bit to avoid
1349 : * false-positives at the later stage.
1350 : *
1351 : * pgtable_l5_enabled() can be false here for several reasons:
1352 : * - 5-level paging is disabled compile-time;
1353 : * - it's 32-bit kernel;
1354 : * - machine doesn't support 5-level paging;
1355 : * - user specified 'no5lvl' in kernel command line.
1356 : */
1357 1 : if (!pgtable_l5_enabled())
1358 1 : setup_clear_cpu_cap(X86_FEATURE_LA57);
1359 :
1360 1 : detect_nopl();
1361 1 : }
1362 :
1363 1 : void __init early_cpu_init(void)
1364 : {
1365 1 : const struct cpu_dev *const *cdev;
1366 1 : int count = 0;
1367 :
1368 : #ifdef CONFIG_PROCESSOR_SELECT
1369 : pr_info("KERNEL supported cpus:\n");
1370 : #endif
1371 :
1372 6 : for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
1373 5 : const struct cpu_dev *cpudev = *cdev;
1374 :
1375 5 : if (count >= X86_VENDOR_NUM)
1376 : break;
1377 5 : cpu_devs[count] = cpudev;
1378 5 : count++;
1379 :
1380 : #ifdef CONFIG_PROCESSOR_SELECT
1381 : {
1382 : unsigned int j;
1383 :
1384 : for (j = 0; j < 2; j++) {
1385 : if (!cpudev->c_ident[j])
1386 : continue;
1387 : pr_info(" %s %s\n", cpudev->c_vendor,
1388 : cpudev->c_ident[j]);
1389 : }
1390 : }
1391 : #endif
1392 : }
1393 1 : early_identify_cpu(&boot_cpu_data);
1394 1 : }
1395 :
1396 4 : static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
1397 : {
1398 : #ifdef CONFIG_X86_64
1399 : /*
1400 : * Empirically, writing zero to a segment selector on AMD does
1401 : * not clear the base, whereas writing zero to a segment
1402 : * selector on Intel does clear the base. Intel's behavior
1403 : * allows slightly faster context switches in the common case
1404 : * where GS is unused by the prev and next threads.
1405 : *
1406 : * Since neither vendor documents this anywhere that I can see,
1407 : * detect it directly instead of hardcoding the choice by
1408 : * vendor.
1409 : *
1410 : * I've designated AMD's behavior as the "bug" because it's
1411 : * counterintuitive and less friendly.
1412 : */
1413 :
1414 4 : unsigned long old_base, tmp;
1415 4 : rdmsrl(MSR_FS_BASE, old_base);
1416 4 : wrmsrl(MSR_FS_BASE, 1);
1417 4 : loadsegment(fs, 0);
1418 4 : rdmsrl(MSR_FS_BASE, tmp);
1419 4 : if (tmp != 0)
1420 0 : set_cpu_bug(c, X86_BUG_NULL_SEG);
1421 4 : wrmsrl(MSR_FS_BASE, old_base);
1422 : #endif
1423 4 : }
1424 :
1425 4 : static void generic_identify(struct cpuinfo_x86 *c)
1426 : {
1427 4 : c->extended_cpuid_level = 0;
1428 :
1429 4 : if (!have_cpuid_p())
1430 4 : identify_cpu_without_cpuid(c);
1431 :
1432 : /* cyrix could have cpuid enabled via c_identify()*/
1433 4 : if (!have_cpuid_p())
1434 : return;
1435 :
1436 4 : cpu_detect(c);
1437 :
1438 4 : get_cpu_vendor(c);
1439 :
1440 4 : get_cpu_cap(c);
1441 :
1442 4 : get_cpu_address_sizes(c);
1443 :
1444 4 : if (c->cpuid_level >= 0x00000001) {
1445 4 : c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
1446 : #ifdef CONFIG_X86_32
1447 : # ifdef CONFIG_SMP
1448 : c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
1449 : # else
1450 : c->apicid = c->initial_apicid;
1451 : # endif
1452 : #endif
1453 4 : c->phys_proc_id = c->initial_apicid;
1454 : }
1455 :
1456 4 : get_model_name(c); /* Default name */
1457 :
1458 4 : detect_null_seg_behavior(c);
1459 :
1460 : /*
1461 : * ESPFIX is a strange bug. All real CPUs have it. Paravirt
1462 : * systems that run Linux at CPL > 0 may or may not have the
1463 : * issue, but, even if they have the issue, there's absolutely
1464 : * nothing we can do about it because we can't use the real IRET
1465 : * instruction.
1466 : *
1467 : * NB: For the time being, only 32-bit kernels support
1468 : * X86_BUG_ESPFIX as such. 64-bit kernels directly choose
1469 : * whether to apply espfix using paravirt hooks. If any
1470 : * non-paravirt system ever shows up that does *not* have the
1471 : * ESPFIX issue, we can change this.
1472 : */
1473 : #ifdef CONFIG_X86_32
1474 : set_cpu_bug(c, X86_BUG_ESPFIX);
1475 : #endif
1476 : }
1477 :
1478 : /*
1479 : * Validate that ACPI/mptables have the same information about the
1480 : * effective APIC id and update the package map.
1481 : */
1482 3 : static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
1483 : {
1484 : #ifdef CONFIG_SMP
1485 3 : unsigned int apicid, cpu = smp_processor_id();
1486 :
1487 3 : apicid = apic->cpu_present_to_apicid(cpu);
1488 :
1489 3 : if (apicid != c->apicid) {
1490 0 : pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x APIC: %x\n",
1491 : cpu, apicid, c->initial_apicid);
1492 : }
1493 3 : BUG_ON(topology_update_package_map(c->phys_proc_id, cpu));
1494 3 : BUG_ON(topology_update_die_map(c->cpu_die_id, cpu));
1495 : #else
1496 : c->logical_proc_id = 0;
1497 : #endif
1498 3 : }
1499 :
1500 : /*
1501 : * This does the hard work of actually picking apart the CPU stuff...
1502 : */
1503 4 : static void identify_cpu(struct cpuinfo_x86 *c)
1504 : {
1505 4 : int i;
1506 :
1507 4 : c->loops_per_jiffy = loops_per_jiffy;
1508 4 : c->x86_cache_size = 0;
1509 4 : c->x86_vendor = X86_VENDOR_UNKNOWN;
1510 4 : c->x86_model = c->x86_stepping = 0; /* So far unknown... */
1511 4 : c->x86_vendor_id[0] = '\0'; /* Unset */
1512 4 : c->x86_model_id[0] = '\0'; /* Unset */
1513 4 : c->x86_max_cores = 1;
1514 4 : c->x86_coreid_bits = 0;
1515 4 : c->cu_id = 0xff;
1516 : #ifdef CONFIG_X86_64
1517 4 : c->x86_clflush_size = 64;
1518 4 : c->x86_phys_bits = 36;
1519 4 : c->x86_virt_bits = 48;
1520 : #else
1521 : c->cpuid_level = -1; /* CPUID not detected */
1522 : c->x86_clflush_size = 32;
1523 : c->x86_phys_bits = 32;
1524 : c->x86_virt_bits = 32;
1525 : #endif
1526 4 : c->x86_cache_alignment = c->x86_clflush_size;
1527 4 : memset(&c->x86_capability, 0, sizeof(c->x86_capability));
1528 : #ifdef CONFIG_X86_VMX_FEATURE_NAMES
1529 4 : memset(&c->vmx_capability, 0, sizeof(c->vmx_capability));
1530 : #endif
1531 :
1532 4 : generic_identify(c);
1533 :
1534 4 : if (this_cpu->c_identify)
1535 0 : this_cpu->c_identify(c);
1536 :
1537 : /* Clear/Set all flags overridden by options, after probe */
1538 4 : apply_forced_caps(c);
1539 :
1540 : #ifdef CONFIG_X86_64
1541 4 : c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
1542 : #endif
1543 :
1544 : /*
1545 : * Vendor-specific initialization. In this section we
1546 : * canonicalize the feature flags, meaning if there are
1547 : * features a certain CPU supports which CPUID doesn't
1548 : * tell us, CPUID claiming incorrect flags, or other bugs,
1549 : * we handle them here.
1550 : *
1551 : * At the end of this section, c->x86_capability better
1552 : * indicate the features this CPU genuinely supports!
1553 : */
1554 4 : if (this_cpu->c_init)
1555 4 : this_cpu->c_init(c);
1556 :
1557 : /* Disable the PN if appropriate */
1558 4 : squash_the_stupid_serial_number(c);
1559 :
1560 : /* Set up SMEP/SMAP/UMIP */
1561 4 : setup_smep(c);
1562 4 : setup_smap(c);
1563 4 : setup_umip(c);
1564 :
1565 : /* Enable FSGSBASE instructions if available. */
1566 4 : if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
1567 4 : cr4_set_bits(X86_CR4_FSGSBASE);
1568 4 : elf_hwcap2 |= HWCAP2_FSGSBASE;
1569 : }
1570 :
1571 : /*
1572 : * The vendor-specific functions might have changed features.
1573 : * Now we do "generic changes."
1574 : */
1575 :
1576 : /* Filter out anything that depends on CPUID levels we don't have */
1577 4 : filter_cpuid_features(c, true);
1578 :
1579 : /* If the model name is still unset, do table lookup. */
1580 4 : if (!c->x86_model_id[0]) {
1581 0 : const char *p;
1582 0 : p = table_lookup_model(c);
1583 0 : if (p)
1584 : strcpy(c->x86_model_id, p);
1585 : else
1586 : /* Last resort... */
1587 0 : sprintf(c->x86_model_id, "%02x/%02x",
1588 0 : c->x86, c->x86_model);
1589 : }
1590 :
1591 : #ifdef CONFIG_X86_64
1592 4 : detect_ht(c);
1593 : #endif
1594 :
1595 4 : x86_init_rdrand(c);
1596 4 : setup_pku(c);
1597 :
1598 : /*
1599 : * Clear/Set all flags overridden by options, need do it
1600 : * before following smp all cpus cap AND.
1601 : */
1602 4 : apply_forced_caps(c);
1603 :
1604 : /*
1605 : * On SMP, boot_cpu_data holds the common feature set between
1606 : * all CPUs; so make sure that we indicate which features are
1607 : * common between the CPUs. The first time this routine gets
1608 : * executed, c == &boot_cpu_data.
1609 : */
1610 4 : if (c != &boot_cpu_data) {
1611 : /* AND the already accumulated flags with these */
1612 63 : for (i = 0; i < NCAPINTS; i++)
1613 60 : boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
1614 :
1615 : /* OR, i.e. replicate the bug flags */
1616 6 : for (i = NCAPINTS; i < NCAPINTS + NBUGINTS; i++)
1617 3 : c->x86_capability[i] |= boot_cpu_data.x86_capability[i];
1618 : }
1619 :
1620 : /* Init Machine Check Exception if available. */
1621 4 : mcheck_cpu_init(c);
1622 :
1623 4 : select_idle_routine(c);
1624 :
1625 : #ifdef CONFIG_NUMA
1626 4 : numa_add_cpu(smp_processor_id());
1627 : #endif
1628 4 : }
1629 :
1630 : /*
1631 : * Set up the CPU state needed to execute SYSENTER/SYSEXIT instructions
1632 : * on 32-bit kernels:
1633 : */
1634 : #ifdef CONFIG_X86_32
1635 : void enable_sep_cpu(void)
1636 : {
1637 : struct tss_struct *tss;
1638 : int cpu;
1639 :
1640 : if (!boot_cpu_has(X86_FEATURE_SEP))
1641 : return;
1642 :
1643 : cpu = get_cpu();
1644 : tss = &per_cpu(cpu_tss_rw, cpu);
1645 :
1646 : /*
1647 : * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
1648 : * see the big comment in struct x86_hw_tss's definition.
1649 : */
1650 :
1651 : tss->x86_tss.ss1 = __KERNEL_CS;
1652 : wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
1653 : wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
1654 : wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
1655 :
1656 : put_cpu();
1657 : }
1658 : #endif
1659 :
1660 1 : void __init identify_boot_cpu(void)
1661 : {
1662 1 : identify_cpu(&boot_cpu_data);
1663 : #ifdef CONFIG_X86_32
1664 : sysenter_setup();
1665 : enable_sep_cpu();
1666 : #endif
1667 1 : cpu_detect_tlb(&boot_cpu_data);
1668 1 : setup_cr_pinning();
1669 :
1670 1 : tsx_init();
1671 1 : }
1672 :
1673 3 : void identify_secondary_cpu(struct cpuinfo_x86 *c)
1674 : {
1675 3 : BUG_ON(c == &boot_cpu_data);
1676 3 : identify_cpu(c);
1677 : #ifdef CONFIG_X86_32
1678 : enable_sep_cpu();
1679 : #endif
1680 3 : mtrr_ap_init();
1681 3 : validate_apic_and_package_id(c);
1682 3 : x86_spec_ctrl_setup_ap();
1683 3 : update_srbds_msr();
1684 3 : }
1685 :
1686 0 : static __init int setup_noclflush(char *arg)
1687 : {
1688 0 : setup_clear_cpu_cap(X86_FEATURE_CLFLUSH);
1689 0 : setup_clear_cpu_cap(X86_FEATURE_CLFLUSHOPT);
1690 0 : return 1;
1691 : }
1692 : __setup("noclflush", setup_noclflush);
1693 :
1694 1 : void print_cpu_info(struct cpuinfo_x86 *c)
1695 : {
1696 1 : const char *vendor = NULL;
1697 :
1698 1 : if (c->x86_vendor < X86_VENDOR_NUM) {
1699 1 : vendor = this_cpu->c_vendor;
1700 : } else {
1701 0 : if (c->cpuid_level >= 0)
1702 0 : vendor = c->x86_vendor_id;
1703 : }
1704 :
1705 1 : if (vendor && !strstr(c->x86_model_id, vendor))
1706 0 : pr_cont("%s ", vendor);
1707 :
1708 1 : if (c->x86_model_id[0])
1709 1 : pr_cont("%s", c->x86_model_id);
1710 : else
1711 0 : pr_cont("%d86", c->x86);
1712 :
1713 1 : pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
1714 :
1715 1 : if (c->x86_stepping || c->cpuid_level >= 0)
1716 1 : pr_cont(", stepping: 0x%x)\n", c->x86_stepping);
1717 : else
1718 0 : pr_cont(")\n");
1719 1 : }
1720 :
1721 : /*
1722 : * clearcpuid= was already parsed in fpu__init_parse_early_param.
1723 : * But we need to keep a dummy __setup around otherwise it would
1724 : * show up as an environment variable for init.
1725 : */
1726 0 : static __init int setup_clearcpuid(char *arg)
1727 : {
1728 0 : return 1;
1729 : }
1730 : __setup("clearcpuid=", setup_clearcpuid);
1731 :
1732 : #ifdef CONFIG_X86_64
1733 : DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
1734 : fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
1735 : EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
1736 :
1737 : /*
1738 : * The following percpu variables are hot. Align current_task to
1739 : * cacheline size such that they fall in the same cacheline.
1740 : */
1741 : DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
1742 : &init_task;
1743 : EXPORT_PER_CPU_SYMBOL(current_task);
1744 :
1745 : DEFINE_PER_CPU(void *, hardirq_stack_ptr);
1746 : DEFINE_PER_CPU(bool, hardirq_stack_inuse);
1747 :
1748 : DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
1749 : EXPORT_PER_CPU_SYMBOL(__preempt_count);
1750 :
1751 : /* May not be marked __init: used by software suspend */
1752 4 : void syscall_init(void)
1753 : {
1754 4 : wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
1755 4 : wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
1756 :
1757 : #ifdef CONFIG_IA32_EMULATION
1758 4 : wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
1759 : /*
1760 : * This only works on Intel CPUs.
1761 : * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
1762 : * This does not cause SYSENTER to jump to the wrong location, because
1763 : * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
1764 : */
1765 4 : wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
1766 12 : wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
1767 4 : (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
1768 4 : wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
1769 : #else
1770 : wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
1771 : wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
1772 : wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
1773 : wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
1774 : #endif
1775 :
1776 : /* Flags to clear on syscall */
1777 4 : wrmsrl(MSR_SYSCALL_MASK,
1778 : X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|
1779 : X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
1780 4 : }
1781 :
1782 : #else /* CONFIG_X86_64 */
1783 :
1784 : DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
1785 : EXPORT_PER_CPU_SYMBOL(current_task);
1786 : DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
1787 : EXPORT_PER_CPU_SYMBOL(__preempt_count);
1788 :
1789 : /*
1790 : * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
1791 : * the top of the kernel stack. Use an extra percpu variable to track the
1792 : * top of the kernel stack directly.
1793 : */
1794 : DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
1795 : (unsigned long)&init_thread_union + THREAD_SIZE;
1796 : EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
1797 :
1798 : #ifdef CONFIG_STACKPROTECTOR
1799 : DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
1800 : #endif
1801 :
1802 : #endif /* CONFIG_X86_64 */
1803 :
1804 : /*
1805 : * Clear all 6 debug registers:
1806 : */
1807 4 : static void clear_all_debug_regs(void)
1808 : {
1809 4 : int i;
1810 :
1811 36 : for (i = 0; i < 8; i++) {
1812 : /* Ignore db4, db5 */
1813 32 : if ((i == 4) || (i == 5))
1814 8 : continue;
1815 :
1816 56 : set_debugreg(0, i);
1817 : }
1818 4 : }
1819 :
1820 : #ifdef CONFIG_KGDB
1821 : /*
1822 : * Restore debug regs if using kgdbwait and you have a kernel debugger
1823 : * connection established.
1824 : */
1825 : static void dbg_restore_debug_regs(void)
1826 : {
1827 : if (unlikely(kgdb_connected && arch_kgdb_ops.correct_hw_break))
1828 : arch_kgdb_ops.correct_hw_break();
1829 : }
1830 : #else /* ! CONFIG_KGDB */
1831 : #define dbg_restore_debug_regs()
1832 : #endif /* ! CONFIG_KGDB */
1833 :
1834 4 : static void wait_for_master_cpu(int cpu)
1835 : {
1836 : #ifdef CONFIG_SMP
1837 : /*
1838 : * wait for ACK from master CPU before continuing
1839 : * with AP initialization
1840 : */
1841 4 : WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask));
1842 959 : while (!cpumask_test_cpu(cpu, cpu_callout_mask))
1843 955 : cpu_relax();
1844 : #endif
1845 4 : }
1846 :
1847 : #ifdef CONFIG_X86_64
1848 7 : static inline void setup_getcpu(int cpu)
1849 : {
1850 7 : unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
1851 7 : struct desc_struct d = { };
1852 :
1853 7 : if (boot_cpu_has(X86_FEATURE_RDTSCP))
1854 7 : write_rdtscp_aux(cpudata);
1855 :
1856 : /* Store CPU and node number in limit. */
1857 7 : d.limit0 = cpudata;
1858 7 : d.limit1 = cpudata >> 16;
1859 :
1860 7 : d.type = 5; /* RO data, expand down, accessed */
1861 7 : d.dpl = 3; /* Visible to user code */
1862 7 : d.s = 1; /* Not a system segment */
1863 7 : d.p = 1; /* Present */
1864 7 : d.d = 1; /* 32-bit */
1865 :
1866 7 : write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_CPUNODE, &d, DESCTYPE_S);
1867 7 : }
1868 :
1869 4 : static inline void ucode_cpu_init(int cpu)
1870 : {
1871 4 : if (cpu)
1872 : load_ucode_ap();
1873 : }
1874 :
1875 7 : static inline void tss_setup_ist(struct tss_struct *tss)
1876 : {
1877 : /* Set up the per-CPU TSS IST stacks */
1878 7 : tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF);
1879 7 : tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
1880 7 : tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
1881 7 : tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
1882 : /* Only mapped when SEV-ES is active */
1883 7 : tss->x86_tss.ist[IST_INDEX_VC] = __this_cpu_ist_top_va(VC);
1884 7 : }
1885 :
1886 : #else /* CONFIG_X86_64 */
1887 :
1888 : static inline void setup_getcpu(int cpu) { }
1889 :
1890 : static inline void ucode_cpu_init(int cpu)
1891 : {
1892 : show_ucode_info_early();
1893 : }
1894 :
1895 : static inline void tss_setup_ist(struct tss_struct *tss) { }
1896 :
1897 : #endif /* !CONFIG_X86_64 */
1898 :
1899 7 : static inline void tss_setup_io_bitmap(struct tss_struct *tss)
1900 : {
1901 7 : tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
1902 :
1903 : #ifdef CONFIG_X86_IOPL_IOPERM
1904 : tss->io_bitmap.prev_max = 0;
1905 : tss->io_bitmap.prev_sequence = 0;
1906 : memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap));
1907 : /*
1908 : * Invalidate the extra array entry past the end of the all
1909 : * permission bitmap as required by the hardware.
1910 : */
1911 : tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL;
1912 : #endif
1913 : }
1914 :
1915 : /*
1916 : * Setup everything needed to handle exceptions from the IDT, including the IST
1917 : * exceptions which use paranoid_entry().
1918 : */
1919 3 : void cpu_init_exception_handling(void)
1920 : {
1921 3 : struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
1922 3 : int cpu = raw_smp_processor_id();
1923 :
1924 : /* paranoid_entry() gets the CPU number from the GDT */
1925 3 : setup_getcpu(cpu);
1926 :
1927 : /* IST vectors need TSS to be set up. */
1928 3 : tss_setup_ist(tss);
1929 3 : tss_setup_io_bitmap(tss);
1930 3 : set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
1931 :
1932 3 : load_TR_desc();
1933 :
1934 : /* Finally load the IDT */
1935 3 : load_current_idt();
1936 3 : }
1937 :
1938 : /*
1939 : * cpu_init() initializes state that is per-CPU. Some data is already
1940 : * initialized (naturally) in the bootstrap process, such as the GDT
1941 : * and IDT. We reload them nevertheless, this function acts as a
1942 : * 'CPU state barrier', nothing should get across.
1943 : */
1944 4 : void cpu_init(void)
1945 : {
1946 4 : struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
1947 4 : struct task_struct *cur = current;
1948 4 : int cpu = raw_smp_processor_id();
1949 :
1950 4 : wait_for_master_cpu(cpu);
1951 :
1952 4 : ucode_cpu_init(cpu);
1953 :
1954 : #ifdef CONFIG_NUMA
1955 4 : if (this_cpu_read(numa_node) == 0 &&
1956 4 : early_cpu_to_node(cpu) != NUMA_NO_NODE)
1957 4 : set_numa_node(early_cpu_to_node(cpu));
1958 : #endif
1959 4 : setup_getcpu(cpu);
1960 :
1961 4 : pr_debug("Initializing CPU#%d\n", cpu);
1962 :
1963 4 : if (IS_ENABLED(CONFIG_X86_64) || cpu_feature_enabled(X86_FEATURE_VME) ||
1964 : boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE))
1965 4 : cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
1966 :
1967 : /*
1968 : * Initialize the per-CPU GDT with the boot GDT,
1969 : * and set up the GDT descriptor:
1970 : */
1971 4 : switch_to_new_gdt(cpu);
1972 4 : load_current_idt();
1973 :
1974 4 : if (IS_ENABLED(CONFIG_X86_64)) {
1975 4 : loadsegment(fs, 0);
1976 4 : memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
1977 4 : syscall_init();
1978 :
1979 4 : wrmsrl(MSR_FS_BASE, 0);
1980 4 : wrmsrl(MSR_KERNEL_GS_BASE, 0);
1981 4 : barrier();
1982 :
1983 4 : x2apic_setup();
1984 : }
1985 :
1986 4 : mmgrab(&init_mm);
1987 4 : cur->active_mm = &init_mm;
1988 4 : BUG_ON(cur->mm);
1989 4 : initialize_tlbstate_and_flush();
1990 4 : enter_lazy_tlb(&init_mm, cur);
1991 :
1992 : /* Initialize the TSS. */
1993 4 : tss_setup_ist(tss);
1994 4 : tss_setup_io_bitmap(tss);
1995 4 : set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
1996 :
1997 4 : load_TR_desc();
1998 : /*
1999 : * sp0 points to the entry trampoline stack regardless of what task
2000 : * is running.
2001 : */
2002 4 : load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
2003 :
2004 4 : load_mm_ldt(&init_mm);
2005 :
2006 4 : clear_all_debug_regs();
2007 4 : dbg_restore_debug_regs();
2008 :
2009 4 : doublefault_init_cpu_tss();
2010 :
2011 4 : fpu__init_cpu();
2012 :
2013 4 : if (is_uv_system())
2014 : uv_cpu_init();
2015 :
2016 4 : load_fixmap_gdt(cpu);
2017 4 : }
2018 :
2019 : /*
2020 : * The microcode loader calls this upon late microcode load to recheck features,
2021 : * only when microcode has been updated. Caller holds microcode_mutex and CPU
2022 : * hotplug lock.
2023 : */
2024 0 : void microcode_check(void)
2025 : {
2026 0 : struct cpuinfo_x86 info;
2027 :
2028 0 : perf_check_microcode();
2029 :
2030 : /* Reload CPUID max function as it might've changed. */
2031 0 : info.cpuid_level = cpuid_eax(0);
2032 :
2033 : /*
2034 : * Copy all capability leafs to pick up the synthetic ones so that
2035 : * memcmp() below doesn't fail on that. The ones coming from CPUID will
2036 : * get overwritten in get_cpu_cap().
2037 : */
2038 0 : memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability));
2039 :
2040 0 : get_cpu_cap(&info);
2041 :
2042 0 : if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)))
2043 0 : return;
2044 :
2045 0 : pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
2046 0 : pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
2047 : }
2048 :
2049 : /*
2050 : * Invoked from core CPU hotplug code after hotplug operations
2051 : */
2052 4 : void arch_smt_update(void)
2053 : {
2054 : /* Handle the speculative execution misfeatures */
2055 4 : cpu_bugs_smt_update();
2056 : /* Check whether IPI broadcasting can be enabled */
2057 4 : apic_smt_update();
2058 4 : }
|