LCOV - landlock.info - mm/oom

LCOV - code coverage report

Current view:	top level - mm - oom_kill.c (source / functions)		Hit	Total	Coverage
Test:	landlock.info	Lines:	17	404	4.2 %
Date:	2021-04-22 12:43:58	Functions:	3	31	9.7 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  *  linux/mm/oom_kill.c
       4             :  * 
       5             :  *  Copyright (C)  1998,2000  Rik van Riel
       6             :  *      Thanks go out to Claus Fischer for some serious inspiration and
       7             :  *      for goading me into coding this file...
       8             :  *  Copyright (C)  2010  Google, Inc.
       9             :  *      Rewritten by David Rientjes
      10             :  *
      11             :  *  The routines in this file are used to kill a process when
      12             :  *  we're seriously out of memory. This gets called from __alloc_pages()
      13             :  *  in mm/page_alloc.c when we really run out of memory.
      14             :  *
      15             :  *  Since we won't call these routines often (on a well-configured
      16             :  *  machine) this file will double as a 'coding guide' and a signpost
      17             :  *  for newbie kernel hackers. It features several pointers to major
      18             :  *  kernel subsystems and hints as to where to find out what things do.
      19             :  */
      20             : 
      21             : #include <linux/oom.h>
      22             : #include <linux/mm.h>
      23             : #include <linux/err.h>
      24             : #include <linux/gfp.h>
      25             : #include <linux/sched.h>
      26             : #include <linux/sched/mm.h>
      27             : #include <linux/sched/coredump.h>
      28             : #include <linux/sched/task.h>
      29             : #include <linux/sched/debug.h>
      30             : #include <linux/swap.h>
      31             : #include <linux/timex.h>
      32             : #include <linux/jiffies.h>
      33             : #include <linux/cpuset.h>
      34             : #include <linux/export.h>
      35             : #include <linux/notifier.h>
      36             : #include <linux/memcontrol.h>
      37             : #include <linux/mempolicy.h>
      38             : #include <linux/security.h>
      39             : #include <linux/ptrace.h>
      40             : #include <linux/freezer.h>
      41             : #include <linux/ftrace.h>
      42             : #include <linux/ratelimit.h>
      43             : #include <linux/kthread.h>
      44             : #include <linux/init.h>
      45             : #include <linux/mmu_notifier.h>
      46             : 
      47             : #include <asm/tlb.h>
      48             : #include "internal.h"
      49             : #include "slab.h"
      50             : 
      51             : #define CREATE_TRACE_POINTS
      52             : #include <trace/events/oom.h>
      53             : 
      54             : int sysctl_panic_on_oom;
      55             : int sysctl_oom_kill_allocating_task;
      56             : int sysctl_oom_dump_tasks = 1;
      57             : 
      58             : /*
      59             :  * Serializes oom killer invocations (out_of_memory()) from all contexts to
      60             :  * prevent from over eager oom killing (e.g. when the oom killer is invoked
      61             :  * from different domains).
      62             :  *
      63             :  * oom_killer_disable() relies on this lock to stabilize oom_killer_disabled
      64             :  * and mark_oom_victim
      65             :  */
      66             : DEFINE_MUTEX(oom_lock);
      67             : /* Serializes oom_score_adj and oom_score_adj_min updates */
      68             : DEFINE_MUTEX(oom_adj_mutex);
      69             : 
      70           0 : static inline bool is_memcg_oom(struct oom_control *oc)
      71             : {
      72           0 :         return oc->memcg != NULL;
      73             : }
      74             : 
      75             : #ifdef CONFIG_NUMA
      76             : /**
      77             :  * oom_cpuset_eligible() - check task eligiblity for kill
      78             :  * @start: task struct of which task to consider
      79             :  * @oc: pointer to struct oom_control
      80             :  *
      81             :  * Task eligibility is determined by whether or not a candidate task, @tsk,
      82             :  * shares the same mempolicy nodes as current if it is bound by such a policy
      83             :  * and whether or not it has the same set of allowed cpuset nodes.
      84             :  *
      85             :  * This function is assuming oom-killer context and 'current' has triggered
      86             :  * the oom-killer.
      87             :  */
      88           0 : static bool oom_cpuset_eligible(struct task_struct *start,
      89             :                                 struct oom_control *oc)
      90             : {
      91           0 :         struct task_struct *tsk;
      92           0 :         bool ret = false;
      93           0 :         const nodemask_t *mask = oc->nodemask;
      94             : 
      95           0 :         if (is_memcg_oom(oc))
      96             :                 return true;
      97             : 
      98           0 :         rcu_read_lock();
      99           0 :         for_each_thread(start, tsk) {
     100           0 :                 if (mask) {
     101             :                         /*
     102             :                          * If this is a mempolicy constrained oom, tsk's
     103             :                          * cpuset is irrelevant.  Only return true if its
     104             :                          * mempolicy intersects current, otherwise it may be
     105             :                          * needlessly killed.
     106             :                          */
     107           0 :                         ret = mempolicy_nodemask_intersects(tsk, mask);
     108             :                 } else {
     109             :                         /*
     110             :                          * This is not a mempolicy constrained oom, so only
     111             :                          * check the mems of tsk's cpuset.
     112             :                          */
     113           0 :                         ret = cpuset_mems_allowed_intersects(current, tsk);
     114             :                 }
     115           0 :                 if (ret)
     116             :                         break;
     117             :         }
     118           0 :         rcu_read_unlock();
     119             : 
     120           0 :         return ret;
     121             : }
     122             : #else
     123             : static bool oom_cpuset_eligible(struct task_struct *tsk, struct oom_control *oc)
     124             : {
     125             :         return true;
     126             : }
     127             : #endif /* CONFIG_NUMA */
     128             : 
     129             : /*
     130             :  * The process p may have detached its own ->mm while exiting or through
     131             :  * kthread_use_mm(), but one or more of its subthreads may still have a valid
     132             :  * pointer.  Return p, or any of its subthreads with a valid ->mm, with
     133             :  * task_lock() held.
     134             :  */
     135          17 : struct task_struct *find_lock_task_mm(struct task_struct *p)
     136             : {
     137          17 :         struct task_struct *t;
     138             : 
     139          17 :         rcu_read_lock();
     140             : 
     141          17 :         for_each_thread(p, t) {
     142          17 :                 task_lock(t);
     143          17 :                 if (likely(t->mm))
     144          17 :                         goto found;
     145           0 :                 task_unlock(t);
     146             :         }
     147             :         t = NULL;
     148          17 : found:
     149          17 :         rcu_read_unlock();
     150             : 
     151          17 :         return t;
     152             : }
     153             : 
     154             : /*
     155             :  * order == -1 means the oom kill is required by sysrq, otherwise only
     156             :  * for display purposes.
     157             :  */
     158           0 : static inline bool is_sysrq_oom(struct oom_control *oc)
     159             : {
     160           0 :         return oc->order == -1;
     161             : }
     162             : 
     163             : /* return true if the task is not adequate as candidate victim task. */
     164           0 : static bool oom_unkillable_task(struct task_struct *p)
     165             : {
     166           0 :         if (is_global_init(p))
     167             :                 return true;
     168           0 :         if (p->flags & PF_KTHREAD)
     169             :                 return true;
     170             :         return false;
     171             : }
     172             : 
     173             : /**
     174             :  * Check whether unreclaimable slab amount is greater than
     175             :  * all user memory(LRU pages).
     176             :  * dump_unreclaimable_slab() could help in the case that
     177             :  * oom due to too much unreclaimable slab used by kernel.
     178             : */
     179           0 : static bool should_dump_unreclaim_slab(void)
     180             : {
     181           0 :         unsigned long nr_lru;
     182             : 
     183           0 :         nr_lru = global_node_page_state(NR_ACTIVE_ANON) +
     184           0 :                  global_node_page_state(NR_INACTIVE_ANON) +
     185           0 :                  global_node_page_state(NR_ACTIVE_FILE) +
     186           0 :                  global_node_page_state(NR_INACTIVE_FILE) +
     187           0 :                  global_node_page_state(NR_ISOLATED_ANON) +
     188           0 :                  global_node_page_state(NR_ISOLATED_FILE) +
     189           0 :                  global_node_page_state(NR_UNEVICTABLE);
     190             : 
     191           0 :         return (global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B) > nr_lru);
     192             : }
     193             : 
     194             : /**
     195             :  * oom_badness - heuristic function to determine which candidate task to kill
     196             :  * @p: task struct of which task we should calculate
     197             :  * @totalpages: total present RAM allowed for page allocation
     198             :  *
     199             :  * The heuristic for determining which task to kill is made to be as simple and
     200             :  * predictable as possible.  The goal is to return the highest value for the
     201             :  * task consuming the most memory to avoid subsequent oom failures.
     202             :  */
     203           0 : long oom_badness(struct task_struct *p, unsigned long totalpages)
     204             : {
     205           0 :         long points;
     206           0 :         long adj;
     207             : 
     208           0 :         if (oom_unkillable_task(p))
     209             :                 return LONG_MIN;
     210             : 
     211           0 :         p = find_lock_task_mm(p);
     212           0 :         if (!p)
     213             :                 return LONG_MIN;
     214             : 
     215             :         /*
     216             :          * Do not even consider tasks which are explicitly marked oom
     217             :          * unkillable or have been already oom reaped or the are in
     218             :          * the middle of vfork
     219             :          */
     220           0 :         adj = (long)p->signal->oom_score_adj;
     221           0 :         if (adj == OOM_SCORE_ADJ_MIN ||
     222           0 :                         test_bit(MMF_OOM_SKIP, &p->mm->flags) ||
     223           0 :                         in_vfork(p)) {
     224           0 :                 task_unlock(p);
     225           0 :                 return LONG_MIN;
     226             :         }
     227             : 
     228             :         /*
     229             :          * The baseline for the badness score is the proportion of RAM that each
     230             :          * task's rss, pagetable and swap space use.
     231             :          */
     232           0 :         points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) +
     233           0 :                 mm_pgtables_bytes(p->mm) / PAGE_SIZE;
     234           0 :         task_unlock(p);
     235             : 
     236             :         /* Normalize to oom_score_adj units */
     237           0 :         adj *= totalpages / 1000;
     238           0 :         points += adj;
     239             : 
     240           0 :         return points;
     241             : }
     242             : 
     243             : static const char * const oom_constraint_text[] = {
     244             :         [CONSTRAINT_NONE] = "CONSTRAINT_NONE",
     245             :         [CONSTRAINT_CPUSET] = "CONSTRAINT_CPUSET",
     246             :         [CONSTRAINT_MEMORY_POLICY] = "CONSTRAINT_MEMORY_POLICY",
     247             :         [CONSTRAINT_MEMCG] = "CONSTRAINT_MEMCG",
     248             : };
     249             : 
     250             : /*
     251             :  * Determine the type of allocation constraint.
     252             :  */
     253           0 : static enum oom_constraint constrained_alloc(struct oom_control *oc)
     254             : {
     255           0 :         struct zone *zone;
     256           0 :         struct zoneref *z;
     257           0 :         enum zone_type highest_zoneidx = gfp_zone(oc->gfp_mask);
     258           0 :         bool cpuset_limited = false;
     259           0 :         int nid;
     260             : 
     261           0 :         if (is_memcg_oom(oc)) {
     262           0 :                 oc->totalpages = mem_cgroup_get_max(oc->memcg) ?: 1;
     263           0 :                 return CONSTRAINT_MEMCG;
     264             :         }
     265             : 
     266             :         /* Default to all available memory */
     267           0 :         oc->totalpages = totalram_pages() + total_swap_pages;
     268             : 
     269           0 :         if (!IS_ENABLED(CONFIG_NUMA))
     270             :                 return CONSTRAINT_NONE;
     271             : 
     272           0 :         if (!oc->zonelist)
     273             :                 return CONSTRAINT_NONE;
     274             :         /*
     275             :          * Reach here only when __GFP_NOFAIL is used. So, we should avoid
     276             :          * to kill current.We have to random task kill in this case.
     277             :          * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
     278             :          */
     279           0 :         if (oc->gfp_mask & __GFP_THISNODE)
     280             :                 return CONSTRAINT_NONE;
     281             : 
     282             :         /*
     283             :          * This is not a __GFP_THISNODE allocation, so a truncated nodemask in
     284             :          * the page allocator means a mempolicy is in effect.  Cpuset policy
     285             :          * is enforced in get_page_from_freelist().
     286             :          */
     287           0 :         if (oc->nodemask &&
     288           0 :             !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
     289           0 :                 oc->totalpages = total_swap_pages;
     290           0 :                 for_each_node_mask(nid, *oc->nodemask)
     291           0 :                         oc->totalpages += node_present_pages(nid);
     292             :                 return CONSTRAINT_MEMORY_POLICY;
     293             :         }
     294             : 
     295             :         /* Check this allocation failure is caused by cpuset's wall function */
     296           0 :         for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
     297             :                         highest_zoneidx, oc->nodemask)
     298           0 :                 if (!cpuset_zone_allowed(zone, oc->gfp_mask))
     299             :                         cpuset_limited = true;
     300             : 
     301             :         if (cpuset_limited) {
     302             :                 oc->totalpages = total_swap_pages;
     303             :                 for_each_node_mask(nid, cpuset_current_mems_allowed)
     304             :                         oc->totalpages += node_present_pages(nid);
     305             :                 return CONSTRAINT_CPUSET;
     306             :         }
     307             :         return CONSTRAINT_NONE;
     308             : }
     309             : 
     310           0 : static int oom_evaluate_task(struct task_struct *task, void *arg)
     311             : {
     312           0 :         struct oom_control *oc = arg;
     313           0 :         long points;
     314             : 
     315           0 :         if (oom_unkillable_task(task))
     316           0 :                 goto next;
     317             : 
     318             :         /* p may not have freeable memory in nodemask */
     319           0 :         if (!is_memcg_oom(oc) && !oom_cpuset_eligible(task, oc))
     320           0 :                 goto next;
     321             : 
     322             :         /*
     323             :          * This task already has access to memory reserves and is being killed.
     324             :          * Don't allow any other task to have access to the reserves unless
     325             :          * the task has MMF_OOM_SKIP because chances that it would release
     326             :          * any memory is quite low.
     327             :          */
     328           0 :         if (!is_sysrq_oom(oc) && tsk_is_oom_victim(task)) {
     329           0 :                 if (test_bit(MMF_OOM_SKIP, &task->signal->oom_mm->flags))
     330           0 :                         goto next;
     331           0 :                 goto abort;
     332             :         }
     333             : 
     334             :         /*
     335             :          * If task is allocating a lot of memory and has been marked to be
     336             :          * killed first if it triggers an oom, then select it.
     337             :          */
     338           0 :         if (oom_task_origin(task)) {
     339           0 :                 points = LONG_MAX;
     340           0 :                 goto select;
     341             :         }
     342             : 
     343           0 :         points = oom_badness(task, oc->totalpages);
     344           0 :         if (points == LONG_MIN || points < oc->chosen_points)
     345           0 :                 goto next;
     346             : 
     347           0 : select:
     348           0 :         if (oc->chosen)
     349           0 :                 put_task_struct(oc->chosen);
     350           0 :         get_task_struct(task);
     351           0 :         oc->chosen = task;
     352           0 :         oc->chosen_points = points;
     353             : next:
     354             :         return 0;
     355           0 : abort:
     356           0 :         if (oc->chosen)
     357           0 :                 put_task_struct(oc->chosen);
     358           0 :         oc->chosen = (void *)-1UL;
     359           0 :         return 1;
     360             : }
     361             : 
     362             : /*
     363             :  * Simple selection loop. We choose the process with the highest number of
     364             :  * 'points'. In case scan was aborted, oc->chosen is set to -1.
     365             :  */
     366           0 : static void select_bad_process(struct oom_control *oc)
     367             : {
     368           0 :         oc->chosen_points = LONG_MIN;
     369             : 
     370           0 :         if (is_memcg_oom(oc))
     371           0 :                 mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
     372             :         else {
     373           0 :                 struct task_struct *p;
     374             : 
     375           0 :                 rcu_read_lock();
     376           0 :                 for_each_process(p)
     377           0 :                         if (oom_evaluate_task(p, oc))
     378             :                                 break;
     379           0 :                 rcu_read_unlock();
     380             :         }
     381           0 : }
     382             : 
     383           0 : static int dump_task(struct task_struct *p, void *arg)
     384             : {
     385           0 :         struct oom_control *oc = arg;
     386           0 :         struct task_struct *task;
     387             : 
     388           0 :         if (oom_unkillable_task(p))
     389             :                 return 0;
     390             : 
     391             :         /* p may not have freeable memory in nodemask */
     392           0 :         if (!is_memcg_oom(oc) && !oom_cpuset_eligible(p, oc))
     393             :                 return 0;
     394             : 
     395           0 :         task = find_lock_task_mm(p);
     396           0 :         if (!task) {
     397             :                 /*
     398             :                  * All of p's threads have already detached their mm's. There's
     399             :                  * no need to report them; they can't be oom killed anyway.
     400             :                  */
     401             :                 return 0;
     402             :         }
     403             : 
     404           0 :         pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu         %5hd %s\n",
     405             :                 task->pid, from_kuid(&init_user_ns, task_uid(task)),
     406             :                 task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
     407             :                 mm_pgtables_bytes(task->mm),
     408             :                 get_mm_counter(task->mm, MM_SWAPENTS),
     409             :                 task->signal->oom_score_adj, task->comm);
     410           0 :         task_unlock(task);
     411             : 
     412           0 :         return 0;
     413             : }
     414             : 
     415             : /**
     416             :  * dump_tasks - dump current memory state of all system tasks
     417             :  * @oc: pointer to struct oom_control
     418             :  *
     419             :  * Dumps the current memory state of all eligible tasks.  Tasks not in the same
     420             :  * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes
     421             :  * are not shown.
     422             :  * State information includes task's pid, uid, tgid, vm size, rss,
     423             :  * pgtables_bytes, swapents, oom_score_adj value, and name.
     424             :  */
     425           0 : static void dump_tasks(struct oom_control *oc)
     426             : {
     427           0 :         pr_info("Tasks state (memory values in pages):\n");
     428           0 :         pr_info("[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
     429             : 
     430           0 :         if (is_memcg_oom(oc))
     431           0 :                 mem_cgroup_scan_tasks(oc->memcg, dump_task, oc);
     432             :         else {
     433           0 :                 struct task_struct *p;
     434             : 
     435           0 :                 rcu_read_lock();
     436           0 :                 for_each_process(p)
     437           0 :                         dump_task(p, oc);
     438           0 :                 rcu_read_unlock();
     439             :         }
     440           0 : }
     441             : 
     442           0 : static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
     443             : {
     444             :         /* one line summary of the oom killer context. */
     445           0 :         pr_info("oom-kill:constraint=%s,nodemask=%*pbl",
     446             :                         oom_constraint_text[oc->constraint],
     447             :                         nodemask_pr_args(oc->nodemask));
     448           0 :         cpuset_print_current_mems_allowed();
     449           0 :         mem_cgroup_print_oom_context(oc->memcg, victim);
     450           0 :         pr_cont(",task=%s,pid=%d,uid=%d\n", victim->comm, victim->pid,
     451             :                 from_kuid(&init_user_ns, task_uid(victim)));
     452           0 : }
     453             : 
     454           0 : static void dump_header(struct oom_control *oc, struct task_struct *p)
     455             : {
     456           0 :         pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
     457             :                 current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
     458             :                         current->signal->oom_score_adj);
     459           0 :         if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order)
     460             :                 pr_warn("COMPACTION is disabled!!!\n");
     461             : 
     462           0 :         dump_stack();
     463           0 :         if (is_memcg_oom(oc))
     464           0 :                 mem_cgroup_print_oom_meminfo(oc->memcg);
     465             :         else {
     466           0 :                 show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask);
     467           0 :                 if (should_dump_unreclaim_slab())
     468           0 :                         dump_unreclaimable_slab();
     469             :         }
     470           0 :         if (sysctl_oom_dump_tasks)
     471           0 :                 dump_tasks(oc);
     472           0 :         if (p)
     473           0 :                 dump_oom_summary(oc, p);
     474           0 : }
     475             : 
     476             : /*
     477             :  * Number of OOM victims in flight
     478             :  */
     479             : static atomic_t oom_victims = ATOMIC_INIT(0);
     480             : static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
     481             : 
     482             : static bool oom_killer_disabled __read_mostly;
     483             : 
     484             : #define K(x) ((x) << (PAGE_SHIFT-10))
     485             : 
     486             : /*
     487             :  * task->mm can be NULL if the task is the exited group leader.  So to
     488             :  * determine whether the task is using a particular mm, we examine all the
     489             :  * task's threads: if one of those is using this mm then this task was also
     490             :  * using it.
     491             :  */
     492           0 : bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
     493             : {
     494           0 :         struct task_struct *t;
     495             : 
     496           0 :         for_each_thread(p, t) {
     497           0 :                 struct mm_struct *t_mm = READ_ONCE(t->mm);
     498           0 :                 if (t_mm)
     499           0 :                         return t_mm == mm;
     500             :         }
     501             :         return false;
     502             : }
     503             : 
     504             : #ifdef CONFIG_MMU
     505             : /*
     506             :  * OOM Reaper kernel thread which tries to reap the memory used by the OOM
     507             :  * victim (if that is possible) to help the OOM killer to move on.
     508             :  */
     509             : static struct task_struct *oom_reaper_th;
     510             : static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
     511             : static struct task_struct *oom_reaper_list;
     512             : static DEFINE_SPINLOCK(oom_reaper_lock);
     513             : 
     514           0 : bool __oom_reap_task_mm(struct mm_struct *mm)
     515             : {
     516           0 :         struct vm_area_struct *vma;
     517           0 :         bool ret = true;
     518             : 
     519             :         /*
     520             :          * Tell all users of get_user/copy_from_user etc... that the content
     521             :          * is no longer stable. No barriers really needed because unmapping
     522             :          * should imply barriers already and the reader would hit a page fault
     523             :          * if it stumbled over a reaped memory.
     524             :          */
     525           0 :         set_bit(MMF_UNSTABLE, &mm->flags);
     526             : 
     527           0 :         for (vma = mm->mmap ; vma; vma = vma->vm_next) {
     528           0 :                 if (!can_madv_lru_vma(vma))
     529           0 :                         continue;
     530             : 
     531             :                 /*
     532             :                  * Only anonymous pages have a good chance to be dropped
     533             :                  * without additional steps which we cannot afford as we
     534             :                  * are OOM already.
     535             :                  *
     536             :                  * We do not even care about fs backed pages because all
     537             :                  * which are reclaimable have already been reclaimed and
     538             :                  * we do not want to block exit_mmap by keeping mm ref
     539             :                  * count elevated without a good reason.
     540             :                  */
     541           0 :                 if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
     542           0 :                         struct mmu_notifier_range range;
     543           0 :                         struct mmu_gather tlb;
     544             : 
     545           0 :                         mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0,
     546             :                                                 vma, mm, vma->vm_start,
     547             :                                                 vma->vm_end);
     548           0 :                         tlb_gather_mmu(&tlb, mm);
     549           0 :                         if (mmu_notifier_invalidate_range_start_nonblock(&range)) {
     550             :                                 tlb_finish_mmu(&tlb);
     551             :                                 ret = false;
     552             :                                 continue;
     553             :                         }
     554           0 :                         unmap_page_range(&tlb, vma, range.start, range.end, NULL);
     555           0 :                         mmu_notifier_invalidate_range_end(&range);
     556           0 :                         tlb_finish_mmu(&tlb);
     557             :                 }
     558             :         }
     559             : 
     560           0 :         return ret;
     561             : }
     562             : 
     563             : /*
     564             :  * Reaps the address space of the give task.
     565             :  *
     566             :  * Returns true on success and false if none or part of the address space
     567             :  * has been reclaimed and the caller should retry later.
     568             :  */
     569           0 : static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
     570             : {
     571           0 :         bool ret = true;
     572             : 
     573           0 :         if (!mmap_read_trylock(mm)) {
     574           0 :                 trace_skip_task_reaping(tsk->pid);
     575           0 :                 return false;
     576             :         }
     577             : 
     578             :         /*
     579             :          * MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't
     580             :          * work on the mm anymore. The check for MMF_OOM_SKIP must run
     581             :          * under mmap_lock for reading because it serializes against the
     582             :          * mmap_write_lock();mmap_write_unlock() cycle in exit_mmap().
     583             :          */
     584           0 :         if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
     585           0 :                 trace_skip_task_reaping(tsk->pid);
     586           0 :                 goto out_unlock;
     587             :         }
     588             : 
     589           0 :         trace_start_task_reaping(tsk->pid);
     590             : 
     591             :         /* failed to reap part of the address space. Try again later */
     592           0 :         ret = __oom_reap_task_mm(mm);
     593           0 :         if (!ret)
     594           0 :                 goto out_finish;
     595             : 
     596           0 :         pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
     597             :                         task_pid_nr(tsk), tsk->comm,
     598             :                         K(get_mm_counter(mm, MM_ANONPAGES)),
     599             :                         K(get_mm_counter(mm, MM_FILEPAGES)),
     600             :                         K(get_mm_counter(mm, MM_SHMEMPAGES)));
     601           0 : out_finish:
     602           0 :         trace_finish_task_reaping(tsk->pid);
     603           0 : out_unlock:
     604           0 :         mmap_read_unlock(mm);
     605             : 
     606           0 :         return ret;
     607             : }
     608             : 
     609             : #define MAX_OOM_REAP_RETRIES 10
     610           0 : static void oom_reap_task(struct task_struct *tsk)
     611             : {
     612           0 :         int attempts = 0;
     613           0 :         struct mm_struct *mm = tsk->signal->oom_mm;
     614             : 
     615             :         /* Retry the mmap_read_trylock(mm) a few times */
     616           0 :         while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
     617           0 :                 schedule_timeout_idle(HZ/10);
     618             : 
     619           0 :         if (attempts <= MAX_OOM_REAP_RETRIES ||
     620           0 :             test_bit(MMF_OOM_SKIP, &mm->flags))
     621           0 :                 goto done;
     622             : 
     623           0 :         pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
     624             :                 task_pid_nr(tsk), tsk->comm);
     625           0 :         sched_show_task(tsk);
     626           0 :         debug_show_all_locks();
     627             : 
     628           0 : done:
     629           0 :         tsk->oom_reaper_list = NULL;
     630             : 
     631             :         /*
     632             :          * Hide this mm from OOM killer because it has been either reaped or
     633             :          * somebody can't call mmap_write_unlock(mm).
     634             :          */
     635           0 :         set_bit(MMF_OOM_SKIP, &mm->flags);
     636             : 
     637             :         /* Drop a reference taken by wake_oom_reaper */
     638           0 :         put_task_struct(tsk);
     639           0 : }
     640             : 
     641           1 : static int oom_reaper(void *unused)
     642             : {
     643           1 :         while (true) {
     644           1 :                 struct task_struct *tsk = NULL;
     645             : 
     646           1 :                 wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL);
     647           0 :                 spin_lock(&oom_reaper_lock);
     648           0 :                 if (oom_reaper_list != NULL) {
     649           0 :                         tsk = oom_reaper_list;
     650           0 :                         oom_reaper_list = tsk->oom_reaper_list;
     651             :                 }
     652           0 :                 spin_unlock(&oom_reaper_lock);
     653             : 
     654           0 :                 if (tsk)
     655           0 :                         oom_reap_task(tsk);
     656             :         }
     657             : 
     658             :         return 0;
     659             : }
     660             : 
     661           0 : static void wake_oom_reaper(struct task_struct *tsk)
     662             : {
     663             :         /* mm is already queued? */
     664           0 :         if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
     665             :                 return;
     666             : 
     667           0 :         get_task_struct(tsk);
     668             : 
     669           0 :         spin_lock(&oom_reaper_lock);
     670           0 :         tsk->oom_reaper_list = oom_reaper_list;
     671           0 :         oom_reaper_list = tsk;
     672           0 :         spin_unlock(&oom_reaper_lock);
     673           0 :         trace_wake_reaper(tsk->pid);
     674           0 :         wake_up(&oom_reaper_wait);
     675             : }
     676             : 
     677           1 : static int __init oom_init(void)
     678             : {
     679           1 :         oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
     680           1 :         return 0;
     681             : }
     682             : subsys_initcall(oom_init)
     683             : #else
     684             : static inline void wake_oom_reaper(struct task_struct *tsk)
     685             : {
     686             : }
     687             : #endif /* CONFIG_MMU */
     688             : 
     689             : /**
     690             :  * mark_oom_victim - mark the given task as OOM victim
     691             :  * @tsk: task to mark
     692             :  *
     693             :  * Has to be called with oom_lock held and never after
     694             :  * oom has been disabled already.
     695             :  *
     696             :  * tsk->mm has to be non NULL and caller has to guarantee it is stable (either
     697             :  * under task_lock or operate on the current).
     698             :  */
     699           0 : static void mark_oom_victim(struct task_struct *tsk)
     700             : {
     701           0 :         struct mm_struct *mm = tsk->mm;
     702             : 
     703           0 :         WARN_ON(oom_killer_disabled);
     704             :         /* OOM killer might race with memcg OOM */
     705           0 :         if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE))
     706             :                 return;
     707             : 
     708             :         /* oom_mm is bound to the signal struct life time. */
     709           0 :         if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {
     710           0 :                 mmgrab(tsk->signal->oom_mm);
     711           0 :                 set_bit(MMF_OOM_VICTIM, &mm->flags);
     712             :         }
     713             : 
     714             :         /*
     715             :          * Make sure that the task is woken up from uninterruptible sleep
     716             :          * if it is frozen because OOM killer wouldn't be able to free
     717             :          * any memory and livelock. freezing_slow_path will tell the freezer
     718             :          * that TIF_MEMDIE tasks should be ignored.
     719             :          */
     720           0 :         __thaw_task(tsk);
     721           0 :         atomic_inc(&oom_victims);
     722           0 :         trace_mark_victim(tsk->pid);
     723             : }
     724             : 
     725             : /**
     726             :  * exit_oom_victim - note the exit of an OOM victim
     727             :  */
     728           0 : void exit_oom_victim(void)
     729             : {
     730           0 :         clear_thread_flag(TIF_MEMDIE);
     731             : 
     732           0 :         if (!atomic_dec_return(&oom_victims))
     733           0 :                 wake_up_all(&oom_victims_wait);
     734           0 : }
     735             : 
     736             : /**
     737             :  * oom_killer_enable - enable OOM killer
     738             :  */
     739           0 : void oom_killer_enable(void)
     740             : {
     741           0 :         oom_killer_disabled = false;
     742           0 :         pr_info("OOM killer enabled.\n");
     743           0 : }
     744             : 
     745             : /**
     746             :  * oom_killer_disable - disable OOM killer
     747             :  * @timeout: maximum timeout to wait for oom victims in jiffies
     748             :  *
     749             :  * Forces all page allocations to fail rather than trigger OOM killer.
     750             :  * Will block and wait until all OOM victims are killed or the given
     751             :  * timeout expires.
     752             :  *
     753             :  * The function cannot be called when there are runnable user tasks because
     754             :  * the userspace would see unexpected allocation failures as a result. Any
     755             :  * new usage of this function should be consulted with MM people.
     756             :  *
     757             :  * Returns true if successful and false if the OOM killer cannot be
     758             :  * disabled.
     759             :  */
     760           0 : bool oom_killer_disable(signed long timeout)
     761             : {
     762           0 :         signed long ret;
     763             : 
     764             :         /*
     765             :          * Make sure to not race with an ongoing OOM killer. Check that the
     766             :          * current is not killed (possibly due to sharing the victim's memory).
     767             :          */
     768           0 :         if (mutex_lock_killable(&oom_lock))
     769             :                 return false;
     770           0 :         oom_killer_disabled = true;
     771           0 :         mutex_unlock(&oom_lock);
     772             : 
     773           0 :         ret = wait_event_interruptible_timeout(oom_victims_wait,
     774             :                         !atomic_read(&oom_victims), timeout);
     775           0 :         if (ret <= 0) {
     776           0 :                 oom_killer_enable();
     777           0 :                 return false;
     778             :         }
     779           0 :         pr_info("OOM killer disabled.\n");
     780             : 
     781           0 :         return true;
     782             : }
     783             : 
     784           0 : static inline bool __task_will_free_mem(struct task_struct *task)
     785             : {
     786           0 :         struct signal_struct *sig = task->signal;
     787             : 
     788             :         /*
     789             :          * A coredumping process may sleep for an extended period in exit_mm(),
     790             :          * so the oom killer cannot assume that the process will promptly exit
     791             :          * and release memory.
     792             :          */
     793           0 :         if (sig->flags & SIGNAL_GROUP_COREDUMP)
     794             :                 return false;
     795             : 
     796           0 :         if (sig->flags & SIGNAL_GROUP_EXIT)
     797             :                 return true;
     798             : 
     799           0 :         if (thread_group_empty(task) && (task->flags & PF_EXITING))
     800           0 :                 return true;
     801             : 
     802             :         return false;
     803             : }
     804             : 
     805             : /*
     806             :  * Checks whether the given task is dying or exiting and likely to
     807             :  * release its address space. This means that all threads and processes
     808             :  * sharing the same mm have to be killed or exiting.
     809             :  * Caller has to make sure that task->mm is stable (hold task_lock or
     810             :  * it operates on the current).
     811             :  */
     812           0 : static bool task_will_free_mem(struct task_struct *task)
     813             : {
     814           0 :         struct mm_struct *mm = task->mm;
     815           0 :         struct task_struct *p;
     816           0 :         bool ret = true;
     817             : 
     818             :         /*
     819             :          * Skip tasks without mm because it might have passed its exit_mm and
     820             :          * exit_oom_victim. oom_reaper could have rescued that but do not rely
     821             :          * on that for now. We can consider find_lock_task_mm in future.
     822             :          */
     823           0 :         if (!mm)
     824             :                 return false;
     825             : 
     826           0 :         if (!__task_will_free_mem(task))
     827             :                 return false;
     828             : 
     829             :         /*
     830             :          * This task has already been drained by the oom reaper so there are
     831             :          * only small chances it will free some more
     832             :          */
     833           0 :         if (test_bit(MMF_OOM_SKIP, &mm->flags))
     834             :                 return false;
     835             : 
     836           0 :         if (atomic_read(&mm->mm_users) <= 1)
     837             :                 return true;
     838             : 
     839             :         /*
     840             :          * Make sure that all tasks which share the mm with the given tasks
     841             :          * are dying as well to make sure that a) nobody pins its mm and
     842             :          * b) the task is also reapable by the oom reaper.
     843             :          */
     844           0 :         rcu_read_lock();
     845           0 :         for_each_process(p) {
     846           0 :                 if (!process_shares_mm(p, mm))
     847           0 :                         continue;
     848           0 :                 if (same_thread_group(task, p))
     849           0 :                         continue;
     850           0 :                 ret = __task_will_free_mem(p);
     851           0 :                 if (!ret)
     852             :                         break;
     853             :         }
     854           0 :         rcu_read_unlock();
     855             : 
     856           0 :         return ret;
     857             : }
     858             : 
     859           0 : static void __oom_kill_process(struct task_struct *victim, const char *message)
     860             : {
     861           0 :         struct task_struct *p;
     862           0 :         struct mm_struct *mm;
     863           0 :         bool can_oom_reap = true;
     864             : 
     865           0 :         p = find_lock_task_mm(victim);
     866           0 :         if (!p) {
     867           0 :                 pr_info("%s: OOM victim %d (%s) is already exiting. Skip killing the task\n",
     868             :                         message, task_pid_nr(victim), victim->comm);
     869           0 :                 put_task_struct(victim);
     870           0 :                 return;
     871           0 :         } else if (victim != p) {
     872           0 :                 get_task_struct(p);
     873           0 :                 put_task_struct(victim);
     874           0 :                 victim = p;
     875             :         }
     876             : 
     877             :         /* Get a reference to safely compare mm after task_unlock(victim) */
     878           0 :         mm = victim->mm;
     879           0 :         mmgrab(mm);
     880             : 
     881             :         /* Raise event before sending signal: task reaper must see this */
     882           0 :         count_vm_event(OOM_KILL);
     883           0 :         memcg_memory_event_mm(mm, MEMCG_OOM_KILL);
     884             : 
     885             :         /*
     886             :          * We should send SIGKILL before granting access to memory reserves
     887             :          * in order to prevent the OOM victim from depleting the memory
     888             :          * reserves from the user space under its control.
     889             :          */
     890           0 :         do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID);
     891           0 :         mark_oom_victim(victim);
     892           0 :         pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB, UID:%u pgtables:%lukB oom_score_adj:%hd\n",
     893             :                 message, task_pid_nr(victim), victim->comm, K(mm->total_vm),
     894             :                 K(get_mm_counter(mm, MM_ANONPAGES)),
     895             :                 K(get_mm_counter(mm, MM_FILEPAGES)),
     896             :                 K(get_mm_counter(mm, MM_SHMEMPAGES)),
     897             :                 from_kuid(&init_user_ns, task_uid(victim)),
     898             :                 mm_pgtables_bytes(mm) >> 10, victim->signal->oom_score_adj);
     899           0 :         task_unlock(victim);
     900             : 
     901             :         /*
     902             :          * Kill all user processes sharing victim->mm in other thread groups, if
     903             :          * any.  They don't get access to memory reserves, though, to avoid
     904             :          * depletion of all memory.  This prevents mm->mmap_lock livelock when an
     905             :          * oom killed thread cannot exit because it requires the semaphore and
     906             :          * its contended by another thread trying to allocate memory itself.
     907             :          * That thread will now get access to memory reserves since it has a
     908             :          * pending fatal signal.
     909             :          */
     910           0 :         rcu_read_lock();
     911           0 :         for_each_process(p) {
     912           0 :                 if (!process_shares_mm(p, mm))
     913           0 :                         continue;
     914           0 :                 if (same_thread_group(p, victim))
     915           0 :                         continue;
     916           0 :                 if (is_global_init(p)) {
     917           0 :                         can_oom_reap = false;
     918           0 :                         set_bit(MMF_OOM_SKIP, &mm->flags);
     919           0 :                         pr_info("oom killer %d (%s) has mm pinned by %d (%s)\n",
     920             :                                         task_pid_nr(victim), victim->comm,
     921             :                                         task_pid_nr(p), p->comm);
     922           0 :                         continue;
     923             :                 }
     924             :                 /*
     925             :                  * No kthead_use_mm() user needs to read from the userspace so
     926             :                  * we are ok to reap it.
     927             :                  */
     928           0 :                 if (unlikely(p->flags & PF_KTHREAD))
     929           0 :                         continue;
     930           0 :                 do_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_TGID);
     931             :         }
     932           0 :         rcu_read_unlock();
     933             : 
     934           0 :         if (can_oom_reap)
     935           0 :                 wake_oom_reaper(victim);
     936             : 
     937           0 :         mmdrop(mm);
     938           0 :         put_task_struct(victim);
     939             : }
     940             : #undef K
     941             : 
     942             : /*
     943             :  * Kill provided task unless it's secured by setting
     944             :  * oom_score_adj to OOM_SCORE_ADJ_MIN.
     945             :  */
     946             : static int oom_kill_memcg_member(struct task_struct *task, void *message)
     947             : {
     948             :         if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN &&
     949             :             !is_global_init(task)) {
     950             :                 get_task_struct(task);
     951             :                 __oom_kill_process(task, message);
     952             :         }
     953             :         return 0;
     954             : }
     955             : 
     956           0 : static void oom_kill_process(struct oom_control *oc, const char *message)
     957             : {
     958           0 :         struct task_struct *victim = oc->chosen;
     959           0 :         struct mem_cgroup *oom_group;
     960           0 :         static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
     961             :                                               DEFAULT_RATELIMIT_BURST);
     962             : 
     963             :         /*
     964             :          * If the task is already exiting, don't alarm the sysadmin or kill
     965             :          * its children or threads, just give it access to memory reserves
     966             :          * so it can die quickly
     967             :          */
     968           0 :         task_lock(victim);
     969           0 :         if (task_will_free_mem(victim)) {
     970           0 :                 mark_oom_victim(victim);
     971           0 :                 wake_oom_reaper(victim);
     972           0 :                 task_unlock(victim);
     973           0 :                 put_task_struct(victim);
     974           0 :                 return;
     975             :         }
     976           0 :         task_unlock(victim);
     977             : 
     978           0 :         if (__ratelimit(&oom_rs))
     979           0 :                 dump_header(oc, victim);
     980             : 
     981             :         /*
     982             :          * Do we need to kill the entire memory cgroup?
     983             :          * Or even one of the ancestor memory cgroups?
     984             :          * Check this out before killing the victim task.
     985             :          */
     986           0 :         oom_group = mem_cgroup_get_oom_group(victim, oc->memcg);
     987             : 
     988           0 :         __oom_kill_process(victim, message);
     989             : 
     990             :         /*
     991             :          * If necessary, kill all tasks in the selected memory cgroup.
     992             :          */
     993           0 :         if (oom_group) {
     994           0 :                 mem_cgroup_print_oom_group(oom_group);
     995           0 :                 mem_cgroup_scan_tasks(oom_group, oom_kill_memcg_member,
     996             :                                       (void*)message);
     997           0 :                 mem_cgroup_put(oom_group);
     998             :         }
     999             : }
    1000             : 
    1001             : /*
    1002             :  * Determines whether the kernel must panic because of the panic_on_oom sysctl.
    1003             :  */
    1004           0 : static void check_panic_on_oom(struct oom_control *oc)
    1005             : {
    1006           0 :         if (likely(!sysctl_panic_on_oom))
    1007             :                 return;
    1008           0 :         if (sysctl_panic_on_oom != 2) {
    1009             :                 /*
    1010             :                  * panic_on_oom == 1 only affects CONSTRAINT_NONE, the kernel
    1011             :                  * does not panic for cpuset, mempolicy, or memcg allocation
    1012             :                  * failures.
    1013             :                  */
    1014           0 :                 if (oc->constraint != CONSTRAINT_NONE)
    1015             :                         return;
    1016             :         }
    1017             :         /* Do not panic for oom kills triggered by sysrq */
    1018           0 :         if (is_sysrq_oom(oc))
    1019             :                 return;
    1020           0 :         dump_header(oc, NULL);
    1021           0 :         panic("Out of memory: %s panic_on_oom is enabled\n",
    1022           0 :                 sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
    1023             : }
    1024             : 
    1025             : static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
    1026             : 
    1027           0 : int register_oom_notifier(struct notifier_block *nb)
    1028             : {
    1029           0 :         return blocking_notifier_chain_register(&oom_notify_list, nb);
    1030             : }
    1031             : EXPORT_SYMBOL_GPL(register_oom_notifier);
    1032             : 
    1033           0 : int unregister_oom_notifier(struct notifier_block *nb)
    1034             : {
    1035           0 :         return blocking_notifier_chain_unregister(&oom_notify_list, nb);
    1036             : }
    1037             : EXPORT_SYMBOL_GPL(unregister_oom_notifier);
    1038             : 
    1039             : /**
    1040             :  * out_of_memory - kill the "best" process when we run out of memory
    1041             :  * @oc: pointer to struct oom_control
    1042             :  *
    1043             :  * If we run out of memory, we have the choice between either
    1044             :  * killing a random task (bad), letting the system crash (worse)
    1045             :  * OR try to be smart about which process to kill. Note that we
    1046             :  * don't have to be perfect here, we just have to be good.
    1047             :  */
    1048           0 : bool out_of_memory(struct oom_control *oc)
    1049             : {
    1050           0 :         unsigned long freed = 0;
    1051             : 
    1052           0 :         if (oom_killer_disabled)
    1053             :                 return false;
    1054             : 
    1055           0 :         if (!is_memcg_oom(oc)) {
    1056           0 :                 blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
    1057           0 :                 if (freed > 0)
    1058             :                         /* Got some memory back in the last second. */
    1059             :                         return true;
    1060             :         }
    1061             : 
    1062             :         /*
    1063             :          * If current has a pending SIGKILL or is exiting, then automatically
    1064             :          * select it.  The goal is to allow it to allocate so that it may
    1065             :          * quickly exit and free its memory.
    1066             :          */
    1067           0 :         if (task_will_free_mem(current)) {
    1068           0 :                 mark_oom_victim(current);
    1069           0 :                 wake_oom_reaper(current);
    1070           0 :                 return true;
    1071             :         }
    1072             : 
    1073             :         /*
    1074             :          * The OOM killer does not compensate for IO-less reclaim.
    1075             :          * pagefault_out_of_memory lost its gfp context so we have to
    1076             :          * make sure exclude 0 mask - all other users should have at least
    1077             :          * ___GFP_DIRECT_RECLAIM to get here. But mem_cgroup_oom() has to
    1078             :          * invoke the OOM killer even if it is a GFP_NOFS allocation.
    1079             :          */
    1080           0 :         if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS) && !is_memcg_oom(oc))
    1081             :                 return true;
    1082             : 
    1083             :         /*
    1084             :          * Check if there were limitations on the allocation (only relevant for
    1085             :          * NUMA and memcg) that may require different handling.
    1086             :          */
    1087           0 :         oc->constraint = constrained_alloc(oc);
    1088           0 :         if (oc->constraint != CONSTRAINT_MEMORY_POLICY)
    1089           0 :                 oc->nodemask = NULL;
    1090           0 :         check_panic_on_oom(oc);
    1091             : 
    1092           0 :         if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
    1093           0 :             current->mm && !oom_unkillable_task(current) &&
    1094           0 :             oom_cpuset_eligible(current, oc) &&
    1095           0 :             current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
    1096           0 :                 get_task_struct(current);
    1097           0 :                 oc->chosen = current;
    1098           0 :                 oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
    1099           0 :                 return true;
    1100             :         }
    1101             : 
    1102           0 :         select_bad_process(oc);
    1103             :         /* Found nothing?!?! */
    1104           0 :         if (!oc->chosen) {
    1105           0 :                 dump_header(oc, NULL);
    1106           0 :                 pr_warn("Out of memory and no killable processes...\n");
    1107             :                 /*
    1108             :                  * If we got here due to an actual allocation at the
    1109             :                  * system level, we cannot survive this and will enter
    1110             :                  * an endless loop in the allocator. Bail out now.
    1111             :                  */
    1112           0 :                 if (!is_sysrq_oom(oc) && !is_memcg_oom(oc))
    1113           0 :                         panic("System is deadlocked on memory\n");
    1114             :         }
    1115           0 :         if (oc->chosen && oc->chosen != (void *)-1UL)
    1116           0 :                 oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
    1117             :                                  "Memory cgroup out of memory");
    1118           0 :         return !!oc->chosen;
    1119             : }
    1120             : 
    1121             : /*
    1122             :  * The pagefault handler calls here because it is out of memory, so kill a
    1123             :  * memory-hogging task. If oom_lock is held by somebody else, a parallel oom
    1124             :  * killing is already in progress so do nothing.
    1125             :  */
    1126           0 : void pagefault_out_of_memory(void)
    1127             : {
    1128           0 :         struct oom_control oc = {
    1129             :                 .zonelist = NULL,
    1130             :                 .nodemask = NULL,
    1131             :                 .memcg = NULL,
    1132             :                 .gfp_mask = 0,
    1133             :                 .order = 0,
    1134             :         };
    1135             : 
    1136           0 :         if (mem_cgroup_oom_synchronize(true))
    1137           0 :                 return;
    1138             : 
    1139           0 :         if (!mutex_trylock(&oom_lock))
    1140           0 :                 return;
    1141           0 :         out_of_memory(&oc);
    1142           0 :         mutex_unlock(&oom_lock);
    1143             : }

Generated by: LCOV version 1.14