LCOV - code coverage report
Current view: top level - arch/x86/kernel - process.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 125 349 35.8 %
Date: 2021-04-22 12:43:58 Functions: 17 36 47.2 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
       3             : 
       4             : #include <linux/errno.h>
       5             : #include <linux/kernel.h>
       6             : #include <linux/mm.h>
       7             : #include <linux/smp.h>
       8             : #include <linux/prctl.h>
       9             : #include <linux/slab.h>
      10             : #include <linux/sched.h>
      11             : #include <linux/sched/idle.h>
      12             : #include <linux/sched/debug.h>
      13             : #include <linux/sched/task.h>
      14             : #include <linux/sched/task_stack.h>
      15             : #include <linux/init.h>
      16             : #include <linux/export.h>
      17             : #include <linux/pm.h>
      18             : #include <linux/tick.h>
      19             : #include <linux/random.h>
      20             : #include <linux/user-return-notifier.h>
      21             : #include <linux/dmi.h>
      22             : #include <linux/utsname.h>
      23             : #include <linux/stackprotector.h>
      24             : #include <linux/cpuidle.h>
      25             : #include <linux/acpi.h>
      26             : #include <linux/elf-randomize.h>
      27             : #include <trace/events/power.h>
      28             : #include <linux/hw_breakpoint.h>
      29             : #include <asm/cpu.h>
      30             : #include <asm/apic.h>
      31             : #include <linux/uaccess.h>
      32             : #include <asm/mwait.h>
      33             : #include <asm/fpu/internal.h>
      34             : #include <asm/debugreg.h>
      35             : #include <asm/nmi.h>
      36             : #include <asm/tlbflush.h>
      37             : #include <asm/mce.h>
      38             : #include <asm/vm86.h>
      39             : #include <asm/switch_to.h>
      40             : #include <asm/desc.h>
      41             : #include <asm/prctl.h>
      42             : #include <asm/spec-ctrl.h>
      43             : #include <asm/io_bitmap.h>
      44             : #include <asm/proto.h>
      45             : #include <asm/frame.h>
      46             : 
      47             : #include "process.h"
      48             : 
      49             : /*
      50             :  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
      51             :  * no more per-task TSS's. The TSS size is kept cacheline-aligned
      52             :  * so they are allowed to end up in the .data..cacheline_aligned
      53             :  * section. Since TSS's are completely CPU-local, we want them
      54             :  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
      55             :  */
      56             : __visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
      57             :         .x86_tss = {
      58             :                 /*
      59             :                  * .sp0 is only used when entering ring 0 from a lower
      60             :                  * privilege level.  Since the init task never runs anything
      61             :                  * but ring 0 code, there is no need for a valid value here.
      62             :                  * Poison it.
      63             :                  */
      64             :                 .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
      65             : 
      66             :                 /*
      67             :                  * .sp1 is cpu_current_top_of_stack.  The init task never
      68             :                  * runs user code, but cpu_current_top_of_stack should still
      69             :                  * be well defined before the first context switch.
      70             :                  */
      71             :                 .sp1 = TOP_OF_INIT_STACK,
      72             : 
      73             : #ifdef CONFIG_X86_32
      74             :                 .ss0 = __KERNEL_DS,
      75             :                 .ss1 = __KERNEL_CS,
      76             : #endif
      77             :                 .io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
      78             :          },
      79             : };
      80             : EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
      81             : 
      82             : DEFINE_PER_CPU(bool, __tss_limit_invalid);
      83             : EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
      84             : 
      85             : /*
      86             :  * this gets called so that we can store lazy state into memory and copy the
      87             :  * current task into the new thread.
      88             :  */
      89        2616 : int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
      90             : {
      91        2616 :         memcpy(dst, src, arch_task_struct_size);
      92             : #ifdef CONFIG_VM86
      93             :         dst->thread.vm86 = NULL;
      94             : #endif
      95             : 
      96        2616 :         return fpu__copy(dst, src);
      97             : }
      98             : 
      99             : /*
     100             :  * Free thread data structures etc..
     101             :  */
     102        2539 : void exit_thread(struct task_struct *tsk)
     103             : {
     104        2539 :         struct thread_struct *t = &tsk->thread;
     105        2539 :         struct fpu *fpu = &t->fpu;
     106             : 
     107        2539 :         if (test_thread_flag(TIF_IO_BITMAP))
     108        2539 :                 io_bitmap_exit(tsk);
     109             : 
     110        2539 :         free_vm86(t);
     111             : 
     112        2539 :         fpu__drop(fpu);
     113        2539 : }
     114             : 
     115           6 : static int set_new_tls(struct task_struct *p, unsigned long tls)
     116             : {
     117           6 :         struct user_desc __user *utls = (struct user_desc __user *)tls;
     118             : 
     119           6 :         if (in_ia32_syscall())
     120           0 :                 return do_set_thread_area(p, -1, utls, 0);
     121             :         else
     122           6 :                 return do_set_thread_area_64(p, ARCH_SET_FS, tls);
     123             : }
     124             : 
     125        2616 : int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
     126             :                 struct task_struct *p, unsigned long tls)
     127             : {
     128        2616 :         struct inactive_task_frame *frame;
     129        2616 :         struct fork_frame *fork_frame;
     130        2616 :         struct pt_regs *childregs;
     131        2616 :         int ret = 0;
     132             : 
     133        2616 :         childregs = task_pt_regs(p);
     134        2616 :         fork_frame = container_of(childregs, struct fork_frame, regs);
     135        2616 :         frame = &fork_frame->frame;
     136             : 
     137        2616 :         frame->bp = encode_frame_pointer(childregs);
     138        2616 :         frame->ret_addr = (unsigned long) ret_from_fork;
     139        2616 :         p->thread.sp = (unsigned long) fork_frame;
     140        2616 :         p->thread.io_bitmap = NULL;
     141        2616 :         memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
     142             : 
     143             : #ifdef CONFIG_X86_64
     144        2616 :         current_save_fsgs();
     145        2616 :         p->thread.fsindex = current->thread.fsindex;
     146        2616 :         p->thread.fsbase = current->thread.fsbase;
     147        2616 :         p->thread.gsindex = current->thread.gsindex;
     148        2616 :         p->thread.gsbase = current->thread.gsbase;
     149             : 
     150        2616 :         savesegment(es, p->thread.es);
     151        2616 :         savesegment(ds, p->thread.ds);
     152             : #else
     153             :         p->thread.sp0 = (unsigned long) (childregs + 1);
     154             :         /*
     155             :          * Clear all status flags including IF and set fixed bit. 64bit
     156             :          * does not have this initialization as the frame does not contain
     157             :          * flags. The flags consistency (especially vs. AC) is there
     158             :          * ensured via objtool, which lacks 32bit support.
     159             :          */
     160             :         frame->flags = X86_EFLAGS_FIXED;
     161             : #endif
     162             : 
     163             :         /* Kernel thread ? */
     164        2616 :         if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
     165          53 :                 memset(childregs, 0, sizeof(struct pt_regs));
     166          53 :                 kthread_frame_init(frame, sp, arg);
     167          53 :                 return 0;
     168             :         }
     169             : 
     170        2563 :         frame->bx = 0;
     171        2563 :         *childregs = *current_pt_regs();
     172        2563 :         childregs->ax = 0;
     173        2563 :         if (sp)
     174           6 :                 childregs->sp = sp;
     175             : 
     176             : #ifdef CONFIG_X86_32
     177             :         task_user_gs(p) = get_user_gs(current_pt_regs());
     178             : #endif
     179             : 
     180             :         /* Set a new TLS for the child thread? */
     181        2563 :         if (clone_flags & CLONE_SETTLS)
     182           6 :                 ret = set_new_tls(p, tls);
     183             : 
     184        2563 :         if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP)))
     185        2616 :                 io_bitmap_share(p);
     186             : 
     187             :         return ret;
     188             : }
     189             : 
     190        2251 : void flush_thread(void)
     191             : {
     192        2251 :         struct task_struct *tsk = current;
     193             : 
     194        2251 :         flush_ptrace_hw_breakpoint(tsk);
     195        2251 :         memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
     196             : 
     197        2251 :         fpu__clear_all(&tsk->thread.fpu);
     198        2251 : }
     199             : 
     200           0 : void disable_TSC(void)
     201             : {
     202           0 :         preempt_disable();
     203           0 :         if (!test_and_set_thread_flag(TIF_NOTSC))
     204             :                 /*
     205             :                  * Must flip the CPU state synchronously with
     206             :                  * TIF_NOTSC in the current running context.
     207             :                  */
     208           0 :                 cr4_set_bits(X86_CR4_TSD);
     209           0 :         preempt_enable();
     210           0 : }
     211             : 
     212           0 : static void enable_TSC(void)
     213             : {
     214           0 :         preempt_disable();
     215           0 :         if (test_and_clear_thread_flag(TIF_NOTSC))
     216             :                 /*
     217             :                  * Must flip the CPU state synchronously with
     218             :                  * TIF_NOTSC in the current running context.
     219             :                  */
     220           0 :                 cr4_clear_bits(X86_CR4_TSD);
     221           0 :         preempt_enable();
     222           0 : }
     223             : 
     224           0 : int get_tsc_mode(unsigned long adr)
     225             : {
     226           0 :         unsigned int val;
     227             : 
     228           0 :         if (test_thread_flag(TIF_NOTSC))
     229             :                 val = PR_TSC_SIGSEGV;
     230             :         else
     231           0 :                 val = PR_TSC_ENABLE;
     232             : 
     233           0 :         return put_user(val, (unsigned int __user *)adr);
     234             : }
     235             : 
     236           0 : int set_tsc_mode(unsigned int val)
     237             : {
     238           0 :         if (val == PR_TSC_SIGSEGV)
     239           0 :                 disable_TSC();
     240           0 :         else if (val == PR_TSC_ENABLE)
     241           0 :                 enable_TSC();
     242             :         else
     243             :                 return -EINVAL;
     244             : 
     245             :         return 0;
     246             : }
     247             : 
     248             : DEFINE_PER_CPU(u64, msr_misc_features_shadow);
     249             : 
     250           0 : static void set_cpuid_faulting(bool on)
     251             : {
     252           0 :         u64 msrval;
     253             : 
     254           0 :         msrval = this_cpu_read(msr_misc_features_shadow);
     255           0 :         msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
     256           0 :         msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT);
     257           0 :         this_cpu_write(msr_misc_features_shadow, msrval);
     258           0 :         wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval);
     259           0 : }
     260             : 
     261           0 : static void disable_cpuid(void)
     262             : {
     263           0 :         preempt_disable();
     264           0 :         if (!test_and_set_thread_flag(TIF_NOCPUID)) {
     265             :                 /*
     266             :                  * Must flip the CPU state synchronously with
     267             :                  * TIF_NOCPUID in the current running context.
     268             :                  */
     269           0 :                 set_cpuid_faulting(true);
     270             :         }
     271           0 :         preempt_enable();
     272           0 : }
     273             : 
     274           0 : static void enable_cpuid(void)
     275             : {
     276           0 :         preempt_disable();
     277           0 :         if (test_and_clear_thread_flag(TIF_NOCPUID)) {
     278             :                 /*
     279             :                  * Must flip the CPU state synchronously with
     280             :                  * TIF_NOCPUID in the current running context.
     281             :                  */
     282           0 :                 set_cpuid_faulting(false);
     283             :         }
     284           0 :         preempt_enable();
     285           0 : }
     286             : 
     287           0 : static int get_cpuid_mode(void)
     288             : {
     289           0 :         return !test_thread_flag(TIF_NOCPUID);
     290             : }
     291             : 
     292           0 : static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
     293             : {
     294           0 :         if (!boot_cpu_has(X86_FEATURE_CPUID_FAULT))
     295             :                 return -ENODEV;
     296             : 
     297           0 :         if (cpuid_enabled)
     298           0 :                 enable_cpuid();
     299             :         else
     300           0 :                 disable_cpuid();
     301             : 
     302             :         return 0;
     303             : }
     304             : 
     305             : /*
     306             :  * Called immediately after a successful exec.
     307             :  */
     308        2251 : void arch_setup_new_exec(void)
     309             : {
     310             :         /* If cpuid was previously disabled for this task, re-enable it. */
     311        2251 :         if (test_thread_flag(TIF_NOCPUID))
     312           0 :                 enable_cpuid();
     313             : 
     314             :         /*
     315             :          * Don't inherit TIF_SSBD across exec boundary when
     316             :          * PR_SPEC_DISABLE_NOEXEC is used.
     317             :          */
     318        2251 :         if (test_thread_flag(TIF_SSBD) &&
     319           0 :             task_spec_ssb_noexec(current)) {
     320           0 :                 clear_thread_flag(TIF_SSBD);
     321           0 :                 task_clear_spec_ssb_disable(current);
     322           0 :                 task_clear_spec_ssb_noexec(current);
     323           0 :                 speculation_ctrl_update(task_thread_info(current)->flags);
     324             :         }
     325        2251 : }
     326             : 
     327             : #ifdef CONFIG_X86_IOPL_IOPERM
     328             : static inline void switch_to_bitmap(unsigned long tifp)
     329             : {
     330             :         /*
     331             :          * Invalidate I/O bitmap if the previous task used it. This prevents
     332             :          * any possible leakage of an active I/O bitmap.
     333             :          *
     334             :          * If the next task has an I/O bitmap it will handle it on exit to
     335             :          * user mode.
     336             :          */
     337             :         if (tifp & _TIF_IO_BITMAP)
     338             :                 tss_invalidate_io_bitmap();
     339             : }
     340             : 
     341             : static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
     342             : {
     343             :         /*
     344             :          * Copy at least the byte range of the incoming tasks bitmap which
     345             :          * covers the permitted I/O ports.
     346             :          *
     347             :          * If the previous task which used an I/O bitmap had more bits
     348             :          * permitted, then the copy needs to cover those as well so they
     349             :          * get turned off.
     350             :          */
     351             :         memcpy(tss->io_bitmap.bitmap, iobm->bitmap,
     352             :                max(tss->io_bitmap.prev_max, iobm->max));
     353             : 
     354             :         /*
     355             :          * Store the new max and the sequence number of this bitmap
     356             :          * and a pointer to the bitmap itself.
     357             :          */
     358             :         tss->io_bitmap.prev_max = iobm->max;
     359             :         tss->io_bitmap.prev_sequence = iobm->sequence;
     360             : }
     361             : 
     362             : /**
     363             :  * tss_update_io_bitmap - Update I/O bitmap before exiting to usermode
     364             :  */
     365             : void native_tss_update_io_bitmap(void)
     366             : {
     367             :         struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
     368             :         struct thread_struct *t = &current->thread;
     369             :         u16 *base = &tss->x86_tss.io_bitmap_base;
     370             : 
     371             :         if (!test_thread_flag(TIF_IO_BITMAP)) {
     372             :                 native_tss_invalidate_io_bitmap();
     373             :                 return;
     374             :         }
     375             : 
     376             :         if (IS_ENABLED(CONFIG_X86_IOPL_IOPERM) && t->iopl_emul == 3) {
     377             :                 *base = IO_BITMAP_OFFSET_VALID_ALL;
     378             :         } else {
     379             :                 struct io_bitmap *iobm = t->io_bitmap;
     380             : 
     381             :                 /*
     382             :                  * Only copy bitmap data when the sequence number differs. The
     383             :                  * update time is accounted to the incoming task.
     384             :                  */
     385             :                 if (tss->io_bitmap.prev_sequence != iobm->sequence)
     386             :                         tss_copy_io_bitmap(tss, iobm);
     387             : 
     388             :                 /* Enable the bitmap */
     389             :                 *base = IO_BITMAP_OFFSET_VALID_MAP;
     390             :         }
     391             : 
     392             :         /*
     393             :          * Make sure that the TSS limit is covering the IO bitmap. It might have
     394             :          * been cut down by a VMEXIT to 0x67 which would cause a subsequent I/O
     395             :          * access from user space to trigger a #GP because tbe bitmap is outside
     396             :          * the TSS limit.
     397             :          */
     398             :         refresh_tss_limit();
     399             : }
     400             : #else /* CONFIG_X86_IOPL_IOPERM */
     401           0 : static inline void switch_to_bitmap(unsigned long tifp) { }
     402             : #endif
     403             : 
     404             : #ifdef CONFIG_SMP
     405             : 
     406             : struct ssb_state {
     407             :         struct ssb_state        *shared_state;
     408             :         raw_spinlock_t          lock;
     409             :         unsigned int            disable_state;
     410             :         unsigned long           local_state;
     411             : };
     412             : 
     413             : #define LSTATE_SSB      0
     414             : 
     415             : static DEFINE_PER_CPU(struct ssb_state, ssb_state);
     416             : 
     417           4 : void speculative_store_bypass_ht_init(void)
     418             : {
     419           4 :         struct ssb_state *st = this_cpu_ptr(&ssb_state);
     420           4 :         unsigned int this_cpu = smp_processor_id();
     421           4 :         unsigned int cpu;
     422             : 
     423           4 :         st->local_state = 0;
     424             : 
     425             :         /*
     426             :          * Shared state setup happens once on the first bringup
     427             :          * of the CPU. It's not destroyed on CPU hotunplug.
     428             :          */
     429           4 :         if (st->shared_state)
     430             :                 return;
     431             : 
     432           4 :         raw_spin_lock_init(&st->lock);
     433             : 
     434             :         /*
     435             :          * Go over HT siblings and check whether one of them has set up the
     436             :          * shared state pointer already.
     437             :          */
     438          12 :         for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) {
     439           4 :                 if (cpu == this_cpu)
     440           4 :                         continue;
     441             : 
     442           0 :                 if (!per_cpu(ssb_state, cpu).shared_state)
     443           0 :                         continue;
     444             : 
     445             :                 /* Link it to the state of the sibling: */
     446           0 :                 st->shared_state = per_cpu(ssb_state, cpu).shared_state;
     447           0 :                 return;
     448             :         }
     449             : 
     450             :         /*
     451             :          * First HT sibling to come up on the core.  Link shared state of
     452             :          * the first HT sibling to itself. The siblings on the same core
     453             :          * which come up later will see the shared state pointer and link
     454             :          * themself to the state of this CPU.
     455             :          */
     456           4 :         st->shared_state = st;
     457             : }
     458             : 
     459             : /*
     460             :  * Logic is: First HT sibling enables SSBD for both siblings in the core
     461             :  * and last sibling to disable it, disables it for the whole core. This how
     462             :  * MSR_SPEC_CTRL works in "hardware":
     463             :  *
     464             :  *  CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL
     465             :  */
     466           0 : static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
     467             : {
     468           0 :         struct ssb_state *st = this_cpu_ptr(&ssb_state);
     469           0 :         u64 msr = x86_amd_ls_cfg_base;
     470             : 
     471           0 :         if (!static_cpu_has(X86_FEATURE_ZEN)) {
     472           0 :                 msr |= ssbd_tif_to_amd_ls_cfg(tifn);
     473           0 :                 wrmsrl(MSR_AMD64_LS_CFG, msr);
     474             :                 return;
     475             :         }
     476             : 
     477           0 :         if (tifn & _TIF_SSBD) {
     478             :                 /*
     479             :                  * Since this can race with prctl(), block reentry on the
     480             :                  * same CPU.
     481             :                  */
     482           0 :                 if (__test_and_set_bit(LSTATE_SSB, &st->local_state))
     483             :                         return;
     484             : 
     485           0 :                 msr |= x86_amd_ls_cfg_ssbd_mask;
     486             : 
     487           0 :                 raw_spin_lock(&st->shared_state->lock);
     488             :                 /* First sibling enables SSBD: */
     489           0 :                 if (!st->shared_state->disable_state)
     490           0 :                         wrmsrl(MSR_AMD64_LS_CFG, msr);
     491           0 :                 st->shared_state->disable_state++;
     492           0 :                 raw_spin_unlock(&st->shared_state->lock);
     493             :         } else {
     494           0 :                 if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state))
     495             :                         return;
     496             : 
     497           0 :                 raw_spin_lock(&st->shared_state->lock);
     498           0 :                 st->shared_state->disable_state--;
     499           0 :                 if (!st->shared_state->disable_state)
     500           0 :                         wrmsrl(MSR_AMD64_LS_CFG, msr);
     501           0 :                 raw_spin_unlock(&st->shared_state->lock);
     502             :         }
     503             : }
     504             : #else
     505             : static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
     506             : {
     507             :         u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
     508             : 
     509             :         wrmsrl(MSR_AMD64_LS_CFG, msr);
     510             : }
     511             : #endif
     512             : 
     513           0 : static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
     514             : {
     515             :         /*
     516             :          * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL,
     517             :          * so ssbd_tif_to_spec_ctrl() just works.
     518             :          */
     519           0 :         wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
     520             : }
     521             : 
     522             : /*
     523             :  * Update the MSRs managing speculation control, during context switch.
     524             :  *
     525             :  * tifp: Previous task's thread flags
     526             :  * tifn: Next task's thread flags
     527             :  */
     528           0 : static __always_inline void __speculation_ctrl_update(unsigned long tifp,
     529             :                                                       unsigned long tifn)
     530             : {
     531           0 :         unsigned long tif_diff = tifp ^ tifn;
     532           0 :         u64 msr = x86_spec_ctrl_base;
     533           0 :         bool updmsr = false;
     534             : 
     535           0 :         lockdep_assert_irqs_disabled();
     536             : 
     537             :         /* Handle change of TIF_SSBD depending on the mitigation method. */
     538           0 :         if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
     539           0 :                 if (tif_diff & _TIF_SSBD)
     540           0 :                         amd_set_ssb_virt_state(tifn);
     541           0 :         } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
     542           0 :                 if (tif_diff & _TIF_SSBD)
     543           0 :                         amd_set_core_ssb_state(tifn);
     544           0 :         } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
     545           0 :                    static_cpu_has(X86_FEATURE_AMD_SSBD)) {
     546           0 :                 updmsr |= !!(tif_diff & _TIF_SSBD);
     547           0 :                 msr |= ssbd_tif_to_spec_ctrl(tifn);
     548             :         }
     549             : 
     550             :         /* Only evaluate TIF_SPEC_IB if conditional STIBP is enabled. */
     551           0 :         if (IS_ENABLED(CONFIG_SMP) &&
     552           0 :             static_branch_unlikely(&switch_to_cond_stibp)) {
     553           0 :                 updmsr |= !!(tif_diff & _TIF_SPEC_IB);
     554           0 :                 msr |= stibp_tif_to_spec_ctrl(tifn);
     555             :         }
     556             : 
     557           0 :         if (updmsr)
     558           0 :                 wrmsrl(MSR_IA32_SPEC_CTRL, msr);
     559             : }
     560             : 
     561           0 : static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
     562             : {
     563           0 :         if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) {
     564           0 :                 if (task_spec_ssb_disable(tsk))
     565           0 :                         set_tsk_thread_flag(tsk, TIF_SSBD);
     566             :                 else
     567           0 :                         clear_tsk_thread_flag(tsk, TIF_SSBD);
     568             : 
     569           0 :                 if (task_spec_ib_disable(tsk))
     570           0 :                         set_tsk_thread_flag(tsk, TIF_SPEC_IB);
     571             :                 else
     572           0 :                         clear_tsk_thread_flag(tsk, TIF_SPEC_IB);
     573             :         }
     574             :         /* Return the updated threadinfo flags*/
     575           0 :         return task_thread_info(tsk)->flags;
     576             : }
     577             : 
     578           0 : void speculation_ctrl_update(unsigned long tif)
     579             : {
     580           0 :         unsigned long flags;
     581             : 
     582             :         /* Forced update. Make sure all relevant TIF flags are different */
     583           0 :         local_irq_save(flags);
     584           0 :         __speculation_ctrl_update(~tif, tif);
     585           0 :         local_irq_restore(flags);
     586           0 : }
     587             : 
     588             : /* Called from seccomp/prctl update */
     589           0 : void speculation_ctrl_update_current(void)
     590             : {
     591           0 :         preempt_disable();
     592           0 :         speculation_ctrl_update(speculation_ctrl_update_tif(current));
     593           0 :         preempt_enable();
     594           0 : }
     595             : 
     596           0 : static inline void cr4_toggle_bits_irqsoff(unsigned long mask)
     597             : {
     598           0 :         unsigned long newval, cr4 = this_cpu_read(cpu_tlbstate.cr4);
     599             : 
     600           0 :         newval = cr4 ^ mask;
     601           0 :         if (newval != cr4) {
     602           0 :                 this_cpu_write(cpu_tlbstate.cr4, newval);
     603           0 :                 __write_cr4(newval);
     604             :         }
     605             : }
     606             : 
     607           0 : void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
     608             : {
     609           0 :         unsigned long tifp, tifn;
     610             : 
     611           0 :         tifn = READ_ONCE(task_thread_info(next_p)->flags);
     612           0 :         tifp = READ_ONCE(task_thread_info(prev_p)->flags);
     613             : 
     614           0 :         switch_to_bitmap(tifp);
     615             : 
     616           0 :         propagate_user_return_notify(prev_p, next_p);
     617             : 
     618           0 :         if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
     619             :             arch_has_block_step()) {
     620           0 :                 unsigned long debugctl, msk;
     621             : 
     622           0 :                 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
     623           0 :                 debugctl &= ~DEBUGCTLMSR_BTF;
     624           0 :                 msk = tifn & _TIF_BLOCKSTEP;
     625           0 :                 debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
     626           0 :                 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
     627             :         }
     628             : 
     629           0 :         if ((tifp ^ tifn) & _TIF_NOTSC)
     630           0 :                 cr4_toggle_bits_irqsoff(X86_CR4_TSD);
     631             : 
     632           0 :         if ((tifp ^ tifn) & _TIF_NOCPUID)
     633           0 :                 set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
     634             : 
     635           0 :         if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) {
     636           0 :                 __speculation_ctrl_update(tifp, tifn);
     637             :         } else {
     638           0 :                 speculation_ctrl_update_tif(prev_p);
     639           0 :                 tifn = speculation_ctrl_update_tif(next_p);
     640             : 
     641             :                 /* Enforce MSR update to ensure consistent state */
     642           0 :                 __speculation_ctrl_update(~tifn, tifn);
     643             :         }
     644             : 
     645           0 :         if ((tifp ^ tifn) & _TIF_SLD)
     646           0 :                 switch_to_sld(tifn);
     647           0 : }
     648             : 
     649             : /*
     650             :  * Idle related variables and functions
     651             :  */
     652             : unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
     653             : EXPORT_SYMBOL(boot_option_idle_override);
     654             : 
     655             : static void (*x86_idle)(void);
     656             : 
     657             : #ifndef CONFIG_SMP
     658             : static inline void play_dead(void)
     659             : {
     660             :         BUG();
     661             : }
     662             : #endif
     663             : 
     664       51710 : void arch_cpu_idle_enter(void)
     665             : {
     666       51710 :         tsc_verify_tsc_adjust(false);
     667       51782 :         local_touch_nmi();
     668       51681 : }
     669             : 
     670           0 : void arch_cpu_idle_dead(void)
     671             : {
     672           0 :         play_dead();
     673           0 : }
     674             : 
     675             : /*
     676             :  * Called from the generic idle code.
     677             :  */
     678       51811 : void arch_cpu_idle(void)
     679             : {
     680       51811 :         x86_idle();
     681       51472 : }
     682             : 
     683             : /*
     684             :  * We use this if we don't have any better idle routine..
     685             :  */
     686       51730 : void __cpuidle default_idle(void)
     687             : {
     688       51730 :         raw_safe_halt();
     689       51685 : }
     690             : #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
     691             : EXPORT_SYMBOL(default_idle);
     692             : #endif
     693             : 
     694             : #ifdef CONFIG_XEN
     695             : bool xen_set_default_idle(void)
     696             : {
     697             :         bool ret = !!x86_idle;
     698             : 
     699             :         x86_idle = default_idle;
     700             : 
     701             :         return ret;
     702             : }
     703             : #endif
     704             : 
     705           0 : void stop_this_cpu(void *dummy)
     706             : {
     707           0 :         local_irq_disable();
     708             :         /*
     709             :          * Remove this CPU:
     710             :          */
     711           0 :         set_cpu_online(smp_processor_id(), false);
     712           0 :         disable_local_APIC();
     713           0 :         mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
     714             : 
     715             :         /*
     716             :          * Use wbinvd on processors that support SME. This provides support
     717             :          * for performing a successful kexec when going from SME inactive
     718             :          * to SME active (or vice-versa). The cache must be cleared so that
     719             :          * if there are entries with the same physical address, both with and
     720             :          * without the encryption bit, they don't race each other when flushed
     721             :          * and potentially end up with the wrong entry being committed to
     722             :          * memory.
     723             :          */
     724           0 :         if (boot_cpu_has(X86_FEATURE_SME))
     725           0 :                 native_wbinvd();
     726           0 :         for (;;) {
     727             :                 /*
     728             :                  * Use native_halt() so that memory contents don't change
     729             :                  * (stack usage and variables) after possibly issuing the
     730             :                  * native_wbinvd() above.
     731             :                  */
     732           0 :                 native_halt();
     733             :         }
     734             : }
     735             : 
     736             : /*
     737             :  * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power
     738             :  * states (local apic timer and TSC stop).
     739             :  *
     740             :  * XXX this function is completely buggered vs RCU and tracing.
     741             :  */
     742           0 : static void amd_e400_idle(void)
     743             : {
     744             :         /*
     745             :          * We cannot use static_cpu_has_bug() here because X86_BUG_AMD_APIC_C1E
     746             :          * gets set after static_cpu_has() places have been converted via
     747             :          * alternatives.
     748             :          */
     749           0 :         if (!boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) {
     750           0 :                 default_idle();
     751           0 :                 return;
     752             :         }
     753             : 
     754           0 :         tick_broadcast_enter();
     755             : 
     756           0 :         default_idle();
     757             : 
     758             :         /*
     759             :          * The switch back from broadcast mode needs to be called with
     760             :          * interrupts disabled.
     761             :          */
     762           0 :         raw_local_irq_disable();
     763           0 :         tick_broadcast_exit();
     764           0 :         raw_local_irq_enable();
     765             : }
     766             : 
     767             : /*
     768             :  * Intel Core2 and older machines prefer MWAIT over HALT for C1.
     769             :  * We can't rely on cpuidle installing MWAIT, because it will not load
     770             :  * on systems that support only C1 -- so the boot default must be MWAIT.
     771             :  *
     772             :  * Some AMD machines are the opposite, they depend on using HALT.
     773             :  *
     774             :  * So for default C1, which is used during boot until cpuidle loads,
     775             :  * use MWAIT-C1 on Intel HW that has it, else use HALT.
     776             :  */
     777           1 : static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
     778             : {
     779           1 :         if (c->x86_vendor != X86_VENDOR_INTEL)
     780             :                 return 0;
     781             : 
     782           1 :         if (!cpu_has(c, X86_FEATURE_MWAIT) || boot_cpu_has_bug(X86_BUG_MONITOR))
     783           1 :                 return 0;
     784             : 
     785             :         return 1;
     786             : }
     787             : 
     788             : /*
     789             :  * MONITOR/MWAIT with no hints, used for default C1 state. This invokes MWAIT
     790             :  * with interrupts enabled and no flags, which is backwards compatible with the
     791             :  * original MWAIT implementation.
     792             :  */
     793           0 : static __cpuidle void mwait_idle(void)
     794             : {
     795           0 :         if (!current_set_polling_and_test()) {
     796           0 :                 if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
     797           0 :                         mb(); /* quirk */
     798           0 :                         clflush((void *)&current_thread_info()->flags);
     799           0 :                         mb(); /* quirk */
     800             :                 }
     801             : 
     802           0 :                 __monitor((void *)&current_thread_info()->flags, 0, 0);
     803           0 :                 if (!need_resched())
     804           0 :                         __sti_mwait(0, 0);
     805             :                 else
     806           0 :                         raw_local_irq_enable();
     807             :         } else {
     808           0 :                 raw_local_irq_enable();
     809             :         }
     810           0 :         __current_clr_polling();
     811           0 : }
     812             : 
     813           4 : void select_idle_routine(const struct cpuinfo_x86 *c)
     814             : {
     815             : #ifdef CONFIG_SMP
     816           4 :         if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1)
     817           0 :                 pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
     818             : #endif
     819           4 :         if (x86_idle || boot_option_idle_override == IDLE_POLL)
     820             :                 return;
     821             : 
     822           1 :         if (boot_cpu_has_bug(X86_BUG_AMD_E400)) {
     823           0 :                 pr_info("using AMD E400 aware idle routine\n");
     824           0 :                 x86_idle = amd_e400_idle;
     825           1 :         } else if (prefer_mwait_c1_over_halt(c)) {
     826           0 :                 pr_info("using mwait in idle threads\n");
     827           0 :                 x86_idle = mwait_idle;
     828             :         } else
     829           1 :                 x86_idle = default_idle;
     830             : }
     831             : 
     832           4 : void amd_e400_c1e_apic_setup(void)
     833             : {
     834           4 :         if (boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) {
     835           0 :                 pr_info("Switch to broadcast mode on CPU%d\n", smp_processor_id());
     836           0 :                 local_irq_disable();
     837           0 :                 tick_broadcast_force();
     838           0 :                 local_irq_enable();
     839             :         }
     840           4 : }
     841             : 
     842           1 : void __init arch_post_acpi_subsys_init(void)
     843             : {
     844           1 :         u32 lo, hi;
     845             : 
     846           1 :         if (!boot_cpu_has_bug(X86_BUG_AMD_E400))
     847             :                 return;
     848             : 
     849             :         /*
     850             :          * AMD E400 detection needs to happen after ACPI has been enabled. If
     851             :          * the machine is affected K8_INTP_C1E_ACTIVE_MASK bits are set in
     852             :          * MSR_K8_INT_PENDING_MSG.
     853             :          */
     854           0 :         rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
     855           0 :         if (!(lo & K8_INTP_C1E_ACTIVE_MASK))
     856             :                 return;
     857             : 
     858           0 :         boot_cpu_set_bug(X86_BUG_AMD_APIC_C1E);
     859             : 
     860           0 :         if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
     861           0 :                 mark_tsc_unstable("TSC halt in AMD C1E");
     862           0 :         pr_info("System has AMD C1E enabled\n");
     863             : }
     864             : 
     865           0 : static int __init idle_setup(char *str)
     866             : {
     867           0 :         if (!str)
     868             :                 return -EINVAL;
     869             : 
     870           0 :         if (!strcmp(str, "poll")) {
     871           0 :                 pr_info("using polling idle threads\n");
     872           0 :                 boot_option_idle_override = IDLE_POLL;
     873           0 :                 cpu_idle_poll_ctrl(true);
     874           0 :         } else if (!strcmp(str, "halt")) {
     875             :                 /*
     876             :                  * When the boot option of idle=halt is added, halt is
     877             :                  * forced to be used for CPU idle. In such case CPU C2/C3
     878             :                  * won't be used again.
     879             :                  * To continue to load the CPU idle driver, don't touch
     880             :                  * the boot_option_idle_override.
     881             :                  */
     882           0 :                 x86_idle = default_idle;
     883           0 :                 boot_option_idle_override = IDLE_HALT;
     884           0 :         } else if (!strcmp(str, "nomwait")) {
     885             :                 /*
     886             :                  * If the boot option of "idle=nomwait" is added,
     887             :                  * it means that mwait will be disabled for CPU C2/C3
     888             :                  * states. In such case it won't touch the variable
     889             :                  * of boot_option_idle_override.
     890             :                  */
     891           0 :                 boot_option_idle_override = IDLE_NOMWAIT;
     892             :         } else
     893             :                 return -1;
     894             : 
     895             :         return 0;
     896             : }
     897             : early_param("idle", idle_setup);
     898             : 
     899        4502 : unsigned long arch_align_stack(unsigned long sp)
     900             : {
     901        4502 :         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
     902        4502 :                 sp -= get_random_int() % 8192;
     903        4502 :         return sp & ~0xf;
     904             : }
     905             : 
     906        2251 : unsigned long arch_randomize_brk(struct mm_struct *mm)
     907             : {
     908        2251 :         return randomize_page(mm->brk, 0x02000000);
     909             : }
     910             : 
     911             : /*
     912             :  * Called from fs/proc with a reference on @p to find the function
     913             :  * which called into schedule(). This needs to be done carefully
     914             :  * because the task might wake up and we might look at a stack
     915             :  * changing under us.
     916             :  */
     917         141 : unsigned long get_wchan(struct task_struct *p)
     918             : {
     919         141 :         unsigned long start, bottom, top, sp, fp, ip, ret = 0;
     920         141 :         int count = 0;
     921             : 
     922         141 :         if (p == current || p->state == TASK_RUNNING)
     923             :                 return 0;
     924             : 
     925          54 :         if (!try_get_task_stack(p))
     926             :                 return 0;
     927             : 
     928          54 :         start = (unsigned long)task_stack_page(p);
     929          54 :         if (!start)
     930           0 :                 goto out;
     931             : 
     932             :         /*
     933             :          * Layout of the stack page:
     934             :          *
     935             :          * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long)
     936             :          * PADDING
     937             :          * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
     938             :          * stack
     939             :          * ----------- bottom = start
     940             :          *
     941             :          * The tasks stack pointer points at the location where the
     942             :          * framepointer is stored. The data on the stack is:
     943             :          * ... IP FP ... IP FP
     944             :          *
     945             :          * We need to read FP and IP, so we need to adjust the upper
     946             :          * bound by another unsigned long.
     947             :          */
     948          54 :         top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
     949          54 :         top -= 2 * sizeof(unsigned long);
     950          54 :         bottom = start;
     951             : 
     952          54 :         sp = READ_ONCE(p->thread.sp);
     953          54 :         if (sp < bottom || sp > top)
     954           0 :                 goto out;
     955             : 
     956          54 :         fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp);
     957         119 :         do {
     958         119 :                 if (fp < bottom || fp > top)
     959           2 :                         goto out;
     960         117 :                 ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
     961         117 :                 if (!in_sched_functions(ip)) {
     962          52 :                         ret = ip;
     963          52 :                         goto out;
     964             :                 }
     965          65 :                 fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
     966          65 :         } while (count++ < 16 && p->state != TASK_RUNNING);
     967             : 
     968           0 : out:
     969          54 :         put_task_stack(p);
     970          54 :         return ret;
     971             : }
     972             : 
     973           0 : long do_arch_prctl_common(struct task_struct *task, int option,
     974             :                           unsigned long cpuid_enabled)
     975             : {
     976           0 :         switch (option) {
     977           0 :         case ARCH_GET_CPUID:
     978           0 :                 return get_cpuid_mode();
     979           0 :         case ARCH_SET_CPUID:
     980           0 :                 return set_cpuid_mode(task, cpuid_enabled);
     981             :         }
     982             : 
     983             :         return -EINVAL;
     984             : }

Generated by: LCOV version 1.14