LCOV - code coverage report
Current view: top level - include/linux - entry-common.h (source / functions) Hit Total Coverage
Test: landlock.info Lines: 7 12 58.3 %
Date: 2021-04-22 12:43:58 Functions: 1 1 100.0 %

          Line data    Source code
       1             : /* SPDX-License-Identifier: GPL-2.0 */
       2             : #ifndef __LINUX_ENTRYCOMMON_H
       3             : #define __LINUX_ENTRYCOMMON_H
       4             : 
       5             : #include <linux/static_call_types.h>
       6             : #include <linux/tracehook.h>
       7             : #include <linux/syscalls.h>
       8             : #include <linux/seccomp.h>
       9             : #include <linux/sched.h>
      10             : 
      11             : #include <asm/entry-common.h>
      12             : 
      13             : /*
      14             :  * Define dummy _TIF work flags if not defined by the architecture or for
      15             :  * disabled functionality.
      16             :  */
      17             : #ifndef _TIF_PATCH_PENDING
      18             : # define _TIF_PATCH_PENDING             (0)
      19             : #endif
      20             : 
      21             : #ifndef _TIF_UPROBE
      22             : # define _TIF_UPROBE                    (0)
      23             : #endif
      24             : 
      25             : /*
      26             :  * SYSCALL_WORK flags handled in syscall_enter_from_user_mode()
      27             :  */
      28             : #ifndef ARCH_SYSCALL_WORK_ENTER
      29             : # define ARCH_SYSCALL_WORK_ENTER        (0)
      30             : #endif
      31             : 
      32             : /*
      33             :  * SYSCALL_WORK flags handled in syscall_exit_to_user_mode()
      34             :  */
      35             : #ifndef ARCH_SYSCALL_WORK_EXIT
      36             : # define ARCH_SYSCALL_WORK_EXIT         (0)
      37             : #endif
      38             : 
      39             : #define SYSCALL_WORK_ENTER      (SYSCALL_WORK_SECCOMP |                 \
      40             :                                  SYSCALL_WORK_SYSCALL_TRACEPOINT |      \
      41             :                                  SYSCALL_WORK_SYSCALL_TRACE |           \
      42             :                                  SYSCALL_WORK_SYSCALL_EMU |             \
      43             :                                  SYSCALL_WORK_SYSCALL_AUDIT |           \
      44             :                                  SYSCALL_WORK_SYSCALL_USER_DISPATCH |   \
      45             :                                  ARCH_SYSCALL_WORK_ENTER)
      46             : #define SYSCALL_WORK_EXIT       (SYSCALL_WORK_SYSCALL_TRACEPOINT |      \
      47             :                                  SYSCALL_WORK_SYSCALL_TRACE |           \
      48             :                                  SYSCALL_WORK_SYSCALL_AUDIT |           \
      49             :                                  SYSCALL_WORK_SYSCALL_USER_DISPATCH |   \
      50             :                                  SYSCALL_WORK_SYSCALL_EXIT_TRAP |       \
      51             :                                  ARCH_SYSCALL_WORK_EXIT)
      52             : 
      53             : /*
      54             :  * TIF flags handled in exit_to_user_mode_loop()
      55             :  */
      56             : #ifndef ARCH_EXIT_TO_USER_MODE_WORK
      57             : # define ARCH_EXIT_TO_USER_MODE_WORK            (0)
      58             : #endif
      59             : 
      60             : #define EXIT_TO_USER_MODE_WORK                                          \
      61             :         (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE |           \
      62             :          _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |  \
      63             :          ARCH_EXIT_TO_USER_MODE_WORK)
      64             : 
      65             : /**
      66             :  * arch_check_user_regs - Architecture specific sanity check for user mode regs
      67             :  * @regs:       Pointer to currents pt_regs
      68             :  *
      69             :  * Defaults to an empty implementation. Can be replaced by architecture
      70             :  * specific code.
      71             :  *
      72             :  * Invoked from syscall_enter_from_user_mode() in the non-instrumentable
      73             :  * section. Use __always_inline so the compiler cannot push it out of line
      74             :  * and make it instrumentable.
      75             :  */
      76             : static __always_inline void arch_check_user_regs(struct pt_regs *regs);
      77             : 
      78             : #ifndef arch_check_user_regs
      79             : static __always_inline void arch_check_user_regs(struct pt_regs *regs) {}
      80             : #endif
      81             : 
      82             : /**
      83             :  * arch_syscall_enter_tracehook - Wrapper around tracehook_report_syscall_entry()
      84             :  * @regs:       Pointer to currents pt_regs
      85             :  *
      86             :  * Returns: 0 on success or an error code to skip the syscall.
      87             :  *
      88             :  * Defaults to tracehook_report_syscall_entry(). Can be replaced by
      89             :  * architecture specific code.
      90             :  *
      91             :  * Invoked from syscall_enter_from_user_mode()
      92             :  */
      93             : static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs);
      94             : 
      95             : #ifndef arch_syscall_enter_tracehook
      96           0 : static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs)
      97             : {
      98           0 :         return tracehook_report_syscall_entry(regs);
      99             : }
     100             : #endif
     101             : 
     102             : /**
     103             :  * enter_from_user_mode - Establish state when coming from user mode
     104             :  *
     105             :  * Syscall/interrupt entry disables interrupts, but user mode is traced as
     106             :  * interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
     107             :  *
     108             :  * 1) Tell lockdep that interrupts are disabled
     109             :  * 2) Invoke context tracking if enabled to reactivate RCU
     110             :  * 3) Trace interrupts off state
     111             :  *
     112             :  * Invoked from architecture specific syscall entry code with interrupts
     113             :  * disabled. The calling code has to be non-instrumentable. When the
     114             :  * function returns all state is correct and interrupts are still
     115             :  * disabled. The subsequent functions can be instrumented.
     116             :  *
     117             :  * This is invoked when there is architecture specific functionality to be
     118             :  * done between establishing state and enabling interrupts. The caller must
     119             :  * enable interrupts before invoking syscall_enter_from_user_mode_work().
     120             :  */
     121             : void enter_from_user_mode(struct pt_regs *regs);
     122             : 
     123             : /**
     124             :  * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
     125             :  * @regs:       Pointer to currents pt_regs
     126             :  *
     127             :  * Invoked from architecture specific syscall entry code with interrupts
     128             :  * disabled. The calling code has to be non-instrumentable. When the
     129             :  * function returns all state is correct, interrupts are enabled and the
     130             :  * subsequent functions can be instrumented.
     131             :  *
     132             :  * This handles lockdep, RCU (context tracking) and tracing state, i.e.
     133             :  * the functionality provided by enter_from_user_mode().
     134             :  *
     135             :  * This is invoked when there is extra architecture specific functionality
     136             :  * to be done between establishing state and handling user mode entry work.
     137             :  */
     138             : void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
     139             : 
     140             : /**
     141             :  * syscall_enter_from_user_mode_work - Check and handle work before invoking
     142             :  *                                     a syscall
     143             :  * @regs:       Pointer to currents pt_regs
     144             :  * @syscall:    The syscall number
     145             :  *
     146             :  * Invoked from architecture specific syscall entry code with interrupts
     147             :  * enabled after invoking syscall_enter_from_user_mode_prepare() and extra
     148             :  * architecture specific work.
     149             :  *
     150             :  * Returns: The original or a modified syscall number
     151             :  *
     152             :  * If the returned syscall number is -1 then the syscall should be
     153             :  * skipped. In this case the caller may invoke syscall_set_error() or
     154             :  * syscall_set_return_value() first.  If neither of those are called and -1
     155             :  * is returned, then the syscall will fail with ENOSYS.
     156             :  *
     157             :  * It handles the following work items:
     158             :  *
     159             :  *  1) syscall_work flag dependent invocations of
     160             :  *     arch_syscall_enter_tracehook(), __secure_computing(), trace_sys_enter()
     161             :  *  2) Invocation of audit_syscall_entry()
     162             :  */
     163             : long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
     164             : 
     165             : /**
     166             :  * syscall_enter_from_user_mode - Establish state and check and handle work
     167             :  *                                before invoking a syscall
     168             :  * @regs:       Pointer to currents pt_regs
     169             :  * @syscall:    The syscall number
     170             :  *
     171             :  * Invoked from architecture specific syscall entry code with interrupts
     172             :  * disabled. The calling code has to be non-instrumentable. When the
     173             :  * function returns all state is correct, interrupts are enabled and the
     174             :  * subsequent functions can be instrumented.
     175             :  *
     176             :  * This is combination of syscall_enter_from_user_mode_prepare() and
     177             :  * syscall_enter_from_user_mode_work().
     178             :  *
     179             :  * Returns: The original or a modified syscall number. See
     180             :  * syscall_enter_from_user_mode_work() for further explanation.
     181             :  */
     182             : long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall);
     183             : 
     184             : /**
     185             :  * local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable()
     186             :  * @ti_work:    Cached TIF flags gathered with interrupts disabled
     187             :  *
     188             :  * Defaults to local_irq_enable(). Can be supplied by architecture specific
     189             :  * code.
     190             :  */
     191             : static inline void local_irq_enable_exit_to_user(unsigned long ti_work);
     192             : 
     193             : #ifndef local_irq_enable_exit_to_user
     194       54909 : static inline void local_irq_enable_exit_to_user(unsigned long ti_work)
     195             : {
     196       54909 :         local_irq_enable();
     197             : }
     198             : #endif
     199             : 
     200             : /**
     201             :  * local_irq_disable_exit_to_user - Exit to user variant of local_irq_disable()
     202             :  *
     203             :  * Defaults to local_irq_disable(). Can be supplied by architecture specific
     204             :  * code.
     205             :  */
     206             : static inline void local_irq_disable_exit_to_user(void);
     207             : 
     208             : #ifndef local_irq_disable_exit_to_user
     209      350885 : static inline void local_irq_disable_exit_to_user(void)
     210             : {
     211      350885 :         local_irq_disable();
     212      350836 : }
     213             : #endif
     214             : 
     215             : /**
     216             :  * arch_exit_to_user_mode_work - Architecture specific TIF work for exit
     217             :  *                               to user mode.
     218             :  * @regs:       Pointer to currents pt_regs
     219             :  * @ti_work:    Cached TIF flags gathered with interrupts disabled
     220             :  *
     221             :  * Invoked from exit_to_user_mode_loop() with interrupt enabled
     222             :  *
     223             :  * Defaults to NOOP. Can be supplied by architecture specific code.
     224             :  */
     225             : static inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
     226             :                                                unsigned long ti_work);
     227             : 
     228             : #ifndef arch_exit_to_user_mode_work
     229       54912 : static inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
     230             :                                                unsigned long ti_work)
     231             : {
     232       54912 : }
     233             : #endif
     234             : 
     235             : /**
     236             :  * arch_exit_to_user_mode_prepare - Architecture specific preparation for
     237             :  *                                  exit to user mode.
     238             :  * @regs:       Pointer to currents pt_regs
     239             :  * @ti_work:    Cached TIF flags gathered with interrupts disabled
     240             :  *
     241             :  * Invoked from exit_to_user_mode_prepare() with interrupt disabled as the last
     242             :  * function before return. Defaults to NOOP.
     243             :  */
     244             : static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
     245             :                                                   unsigned long ti_work);
     246             : 
     247             : #ifndef arch_exit_to_user_mode_prepare
     248             : static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
     249             :                                                   unsigned long ti_work)
     250             : {
     251             : }
     252             : #endif
     253             : 
     254             : /**
     255             :  * arch_exit_to_user_mode - Architecture specific final work before
     256             :  *                          exit to user mode.
     257             :  *
     258             :  * Invoked from exit_to_user_mode() with interrupt disabled as the last
     259             :  * function before return. Defaults to NOOP.
     260             :  *
     261             :  * This needs to be __always_inline because it is non-instrumentable code
     262             :  * invoked after context tracking switched to user mode.
     263             :  *
     264             :  * An architecture implementation must not do anything complex, no locking
     265             :  * etc. The main purpose is for speculation mitigations.
     266             :  */
     267             : static __always_inline void arch_exit_to_user_mode(void);
     268             : 
     269             : #ifndef arch_exit_to_user_mode
     270             : static __always_inline void arch_exit_to_user_mode(void) { }
     271             : #endif
     272             : 
     273             : /**
     274             :  * arch_do_signal_or_restart -  Architecture specific signal delivery function
     275             :  * @regs:       Pointer to currents pt_regs
     276             :  * @has_signal: actual signal to handle
     277             :  *
     278             :  * Invoked from exit_to_user_mode_loop().
     279             :  */
     280             : void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal);
     281             : 
     282             : /**
     283             :  * arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit()
     284             :  * @regs:       Pointer to currents pt_regs
     285             :  * @step:       Indicator for single step
     286             :  *
     287             :  * Defaults to tracehook_report_syscall_exit(). Can be replaced by
     288             :  * architecture specific code.
     289             :  *
     290             :  * Invoked from syscall_exit_to_user_mode()
     291             :  */
     292             : static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step);
     293             : 
     294             : #ifndef arch_syscall_exit_tracehook
     295           0 : static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step)
     296             : {
     297           0 :         tracehook_report_syscall_exit(regs, step);
     298           0 : }
     299             : #endif
     300             : 
     301             : /**
     302             :  * exit_to_user_mode - Fixup state when exiting to user mode
     303             :  *
     304             :  * Syscall/interrupt exit enables interrupts, but the kernel state is
     305             :  * interrupts disabled when this is invoked. Also tell RCU about it.
     306             :  *
     307             :  * 1) Trace interrupts on state
     308             :  * 2) Invoke context tracking if enabled to adjust RCU state
     309             :  * 3) Invoke architecture specific last minute exit code, e.g. speculation
     310             :  *    mitigations, etc.: arch_exit_to_user_mode()
     311             :  * 4) Tell lockdep that interrupts are enabled
     312             :  *
     313             :  * Invoked from architecture specific code when syscall_exit_to_user_mode()
     314             :  * is not suitable as the last step before returning to userspace. Must be
     315             :  * invoked with interrupts disabled and the caller must be
     316             :  * non-instrumentable.
     317             :  * The caller has to invoke syscall_exit_to_user_mode_work() before this.
     318             :  */
     319             : void exit_to_user_mode(void);
     320             : 
     321             : /**
     322             :  * syscall_exit_to_user_mode_work - Handle work before returning to user mode
     323             :  * @regs:       Pointer to currents pt_regs
     324             :  *
     325             :  * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling
     326             :  * exit_to_user_mode() to perform the final transition to user mode.
     327             :  *
     328             :  * Calling convention is the same as for syscall_exit_to_user_mode() and it
     329             :  * returns with all work handled and interrupts disabled. The caller must
     330             :  * invoke exit_to_user_mode() before actually switching to user mode to
     331             :  * make the final state transitions. Interrupts must stay disabled between
     332             :  * return from this function and the invocation of exit_to_user_mode().
     333             :  */
     334             : void syscall_exit_to_user_mode_work(struct pt_regs *regs);
     335             : 
     336             : /**
     337             :  * syscall_exit_to_user_mode - Handle work before returning to user mode
     338             :  * @regs:       Pointer to currents pt_regs
     339             :  *
     340             :  * Invoked with interrupts enabled and fully valid regs. Returns with all
     341             :  * work handled, interrupts disabled such that the caller can immediately
     342             :  * switch to user mode. Called from architecture specific syscall and ret
     343             :  * from fork code.
     344             :  *
     345             :  * The call order is:
     346             :  *  1) One-time syscall exit work:
     347             :  *      - rseq syscall exit
     348             :  *      - audit
     349             :  *      - syscall tracing
     350             :  *      - tracehook (single stepping)
     351             :  *
     352             :  *  2) Preparatory work
     353             :  *      - Exit to user mode loop (common TIF handling). Invokes
     354             :  *        arch_exit_to_user_mode_work() for architecture specific TIF work
     355             :  *      - Architecture specific one time work arch_exit_to_user_mode_prepare()
     356             :  *      - Address limit and lockdep checks
     357             :  *
     358             :  *  3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the
     359             :  *     functionality in exit_to_user_mode().
     360             :  *
     361             :  * This is a combination of syscall_exit_to_user_mode_work() (1,2) and
     362             :  * exit_to_user_mode(). This function is preferred unless there is a
     363             :  * compelling architectural reason to use the seperate functions.
     364             :  */
     365             : void syscall_exit_to_user_mode(struct pt_regs *regs);
     366             : 
     367             : /**
     368             :  * irqentry_enter_from_user_mode - Establish state before invoking the irq handler
     369             :  * @regs:       Pointer to currents pt_regs
     370             :  *
     371             :  * Invoked from architecture specific entry code with interrupts disabled.
     372             :  * Can only be called when the interrupt entry came from user mode. The
     373             :  * calling code must be non-instrumentable.  When the function returns all
     374             :  * state is correct and the subsequent functions can be instrumented.
     375             :  *
     376             :  * The function establishes state (lockdep, RCU (context tracking), tracing)
     377             :  */
     378             : void irqentry_enter_from_user_mode(struct pt_regs *regs);
     379             : 
     380             : /**
     381             :  * irqentry_exit_to_user_mode - Interrupt exit work
     382             :  * @regs:       Pointer to current's pt_regs
     383             :  *
     384             :  * Invoked with interrupts disbled and fully valid regs. Returns with all
     385             :  * work handled, interrupts disabled such that the caller can immediately
     386             :  * switch to user mode. Called from architecture specific interrupt
     387             :  * handling code.
     388             :  *
     389             :  * The call order is #2 and #3 as described in syscall_exit_to_user_mode().
     390             :  * Interrupt exit is not invoking #1 which is the syscall specific one time
     391             :  * work.
     392             :  */
     393             : void irqentry_exit_to_user_mode(struct pt_regs *regs);
     394             : 
     395             : #ifndef irqentry_state
     396             : /**
     397             :  * struct irqentry_state - Opaque object for exception state storage
     398             :  * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the
     399             :  *            exit path has to invoke rcu_irq_exit().
     400             :  * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that
     401             :  *           lockdep state is restored correctly on exit from nmi.
     402             :  *
     403             :  * This opaque object is filled in by the irqentry_*_enter() functions and
     404             :  * must be passed back into the corresponding irqentry_*_exit() functions
     405             :  * when the exception is complete.
     406             :  *
     407             :  * Callers of irqentry_*_[enter|exit]() must consider this structure opaque
     408             :  * and all members private.  Descriptions of the members are provided to aid in
     409             :  * the maintenance of the irqentry_*() functions.
     410             :  */
     411             : typedef struct irqentry_state {
     412             :         union {
     413             :                 bool    exit_rcu;
     414             :                 bool    lockdep;
     415             :         };
     416             : } irqentry_state_t;
     417             : #endif
     418             : 
     419             : /**
     420             :  * irqentry_enter - Handle state tracking on ordinary interrupt entries
     421             :  * @regs:       Pointer to pt_regs of interrupted context
     422             :  *
     423             :  * Invokes:
     424             :  *  - lockdep irqflag state tracking as low level ASM entry disabled
     425             :  *    interrupts.
     426             :  *
     427             :  *  - Context tracking if the exception hit user mode.
     428             :  *
     429             :  *  - The hardirq tracer to keep the state consistent as low level ASM
     430             :  *    entry disabled interrupts.
     431             :  *
     432             :  * As a precondition, this requires that the entry came from user mode,
     433             :  * idle, or a kernel context in which RCU is watching.
     434             :  *
     435             :  * For kernel mode entries RCU handling is done conditional. If RCU is
     436             :  * watching then the only RCU requirement is to check whether the tick has
     437             :  * to be restarted. If RCU is not watching then rcu_irq_enter() has to be
     438             :  * invoked on entry and rcu_irq_exit() on exit.
     439             :  *
     440             :  * Avoiding the rcu_irq_enter/exit() calls is an optimization but also
     441             :  * solves the problem of kernel mode pagefaults which can schedule, which
     442             :  * is not possible after invoking rcu_irq_enter() without undoing it.
     443             :  *
     444             :  * For user mode entries irqentry_enter_from_user_mode() is invoked to
     445             :  * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
     446             :  * would not be possible.
     447             :  *
     448             :  * Returns: An opaque object that must be passed to idtentry_exit()
     449             :  */
     450             : irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs);
     451             : 
     452             : /**
     453             :  * irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt
     454             :  *
     455             :  * Conditional reschedule with additional sanity checks.
     456             :  */
     457             : void irqentry_exit_cond_resched(void);
     458             : #ifdef CONFIG_PREEMPT_DYNAMIC
     459             : DECLARE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched);
     460             : #endif
     461             : 
     462             : /**
     463             :  * irqentry_exit - Handle return from exception that used irqentry_enter()
     464             :  * @regs:       Pointer to pt_regs (exception entry regs)
     465             :  * @state:      Return value from matching call to irqentry_enter()
     466             :  *
     467             :  * Depending on the return target (kernel/user) this runs the necessary
     468             :  * preemption and work checks if possible and required and returns to
     469             :  * the caller with interrupts disabled and no further work pending.
     470             :  *
     471             :  * This is the last action before returning to the low level ASM code which
     472             :  * just needs to return to the appropriate context.
     473             :  *
     474             :  * Counterpart to irqentry_enter().
     475             :  */
     476             : void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state);
     477             : 
     478             : /**
     479             :  * irqentry_nmi_enter - Handle NMI entry
     480             :  * @regs:       Pointer to currents pt_regs
     481             :  *
     482             :  * Similar to irqentry_enter() but taking care of the NMI constraints.
     483             :  */
     484             : irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs);
     485             : 
     486             : /**
     487             :  * irqentry_nmi_exit - Handle return from NMI handling
     488             :  * @regs:       Pointer to pt_regs (NMI entry regs)
     489             :  * @irq_state:  Return value from matching call to irqentry_nmi_enter()
     490             :  *
     491             :  * Last action before returning to the low level assembly code.
     492             :  *
     493             :  * Counterpart to irqentry_nmi_enter().
     494             :  */
     495             : void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state);
     496             : 
     497             : #endif

Generated by: LCOV version 1.14