LCOV - code coverage report
Current view: top level - mm - page_reporting.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 0 129 0.0 %
Date: 2021-04-22 12:43:58 Functions: 0 8 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : #include <linux/mm.h>
       3             : #include <linux/mmzone.h>
       4             : #include <linux/page_reporting.h>
       5             : #include <linux/gfp.h>
       6             : #include <linux/export.h>
       7             : #include <linux/delay.h>
       8             : #include <linux/scatterlist.h>
       9             : 
      10             : #include "page_reporting.h"
      11             : #include "internal.h"
      12             : 
      13             : #define PAGE_REPORTING_DELAY    (2 * HZ)
      14             : static struct page_reporting_dev_info __rcu *pr_dev_info __read_mostly;
      15             : 
      16             : enum {
      17             :         PAGE_REPORTING_IDLE = 0,
      18             :         PAGE_REPORTING_REQUESTED,
      19             :         PAGE_REPORTING_ACTIVE
      20             : };
      21             : 
      22             : /* request page reporting */
      23             : static void
      24           0 : __page_reporting_request(struct page_reporting_dev_info *prdev)
      25             : {
      26           0 :         unsigned int state;
      27             : 
      28             :         /* Check to see if we are in desired state */
      29           0 :         state = atomic_read(&prdev->state);
      30           0 :         if (state == PAGE_REPORTING_REQUESTED)
      31             :                 return;
      32             : 
      33             :         /*
      34             :          *  If reporting is already active there is nothing we need to do.
      35             :          *  Test against 0 as that represents PAGE_REPORTING_IDLE.
      36             :          */
      37           0 :         state = atomic_xchg(&prdev->state, PAGE_REPORTING_REQUESTED);
      38           0 :         if (state != PAGE_REPORTING_IDLE)
      39             :                 return;
      40             : 
      41             :         /*
      42             :          * Delay the start of work to allow a sizable queue to build. For
      43             :          * now we are limiting this to running no more than once every
      44             :          * couple of seconds.
      45             :          */
      46           0 :         schedule_delayed_work(&prdev->work, PAGE_REPORTING_DELAY);
      47             : }
      48             : 
      49             : /* notify prdev of free page reporting request */
      50           0 : void __page_reporting_notify(void)
      51             : {
      52           0 :         struct page_reporting_dev_info *prdev;
      53             : 
      54             :         /*
      55             :          * We use RCU to protect the pr_dev_info pointer. In almost all
      56             :          * cases this should be present, however in the unlikely case of
      57             :          * a shutdown this will be NULL and we should exit.
      58             :          */
      59           0 :         rcu_read_lock();
      60           0 :         prdev = rcu_dereference(pr_dev_info);
      61           0 :         if (likely(prdev))
      62           0 :                 __page_reporting_request(prdev);
      63             : 
      64           0 :         rcu_read_unlock();
      65           0 : }
      66             : 
      67             : static void
      68           0 : page_reporting_drain(struct page_reporting_dev_info *prdev,
      69             :                      struct scatterlist *sgl, unsigned int nents, bool reported)
      70             : {
      71           0 :         struct scatterlist *sg = sgl;
      72             : 
      73             :         /*
      74             :          * Drain the now reported pages back into their respective
      75             :          * free lists/areas. We assume at least one page is populated.
      76             :          */
      77           0 :         do {
      78           0 :                 struct page *page = sg_page(sg);
      79           0 :                 int mt = get_pageblock_migratetype(page);
      80           0 :                 unsigned int order = get_order(sg->length);
      81             : 
      82           0 :                 __putback_isolated_page(page, order, mt);
      83             : 
      84             :                 /* If the pages were not reported due to error skip flagging */
      85           0 :                 if (!reported)
      86           0 :                         continue;
      87             : 
      88             :                 /*
      89             :                  * If page was not comingled with another page we can
      90             :                  * consider the result to be "reported" since the page
      91             :                  * hasn't been modified, otherwise we will need to
      92             :                  * report on the new larger page when we make our way
      93             :                  * up to that higher order.
      94             :                  */
      95           0 :                 if (PageBuddy(page) && buddy_order(page) == order)
      96           0 :                         __SetPageReported(page);
      97           0 :         } while ((sg = sg_next(sg)));
      98             : 
      99             :         /* reinitialize scatterlist now that it is empty */
     100           0 :         sg_init_table(sgl, nents);
     101           0 : }
     102             : 
     103             : /*
     104             :  * The page reporting cycle consists of 4 stages, fill, report, drain, and
     105             :  * idle. We will cycle through the first 3 stages until we cannot obtain a
     106             :  * full scatterlist of pages, in that case we will switch to idle.
     107             :  */
     108             : static int
     109           0 : page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone,
     110             :                      unsigned int order, unsigned int mt,
     111             :                      struct scatterlist *sgl, unsigned int *offset)
     112             : {
     113           0 :         struct free_area *area = &zone->free_area[order];
     114           0 :         struct list_head *list = &area->free_list[mt];
     115           0 :         unsigned int page_len = PAGE_SIZE << order;
     116           0 :         struct page *page, *next;
     117           0 :         long budget;
     118           0 :         int err = 0;
     119             : 
     120             :         /*
     121             :          * Perform early check, if free area is empty there is
     122             :          * nothing to process so we can skip this free_list.
     123             :          */
     124           0 :         if (list_empty(list))
     125             :                 return err;
     126             : 
     127           0 :         spin_lock_irq(&zone->lock);
     128             : 
     129             :         /*
     130             :          * Limit how many calls we will be making to the page reporting
     131             :          * device for this list. By doing this we avoid processing any
     132             :          * given list for too long.
     133             :          *
     134             :          * The current value used allows us enough calls to process over a
     135             :          * sixteenth of the current list plus one additional call to handle
     136             :          * any pages that may have already been present from the previous
     137             :          * list processed. This should result in us reporting all pages on
     138             :          * an idle system in about 30 seconds.
     139             :          *
     140             :          * The division here should be cheap since PAGE_REPORTING_CAPACITY
     141             :          * should always be a power of 2.
     142             :          */
     143           0 :         budget = DIV_ROUND_UP(area->nr_free, PAGE_REPORTING_CAPACITY * 16);
     144             : 
     145             :         /* loop through free list adding unreported pages to sg list */
     146           0 :         list_for_each_entry_safe(page, next, list, lru) {
     147             :                 /* We are going to skip over the reported pages. */
     148           0 :                 if (PageReported(page))
     149           0 :                         continue;
     150             : 
     151             :                 /*
     152             :                  * If we fully consumed our budget then update our
     153             :                  * state to indicate that we are requesting additional
     154             :                  * processing and exit this list.
     155             :                  */
     156           0 :                 if (budget < 0) {
     157           0 :                         atomic_set(&prdev->state, PAGE_REPORTING_REQUESTED);
     158           0 :                         next = page;
     159           0 :                         break;
     160             :                 }
     161             : 
     162             :                 /* Attempt to pull page from list and place in scatterlist */
     163           0 :                 if (*offset) {
     164           0 :                         if (!__isolate_free_page(page, order)) {
     165             :                                 next = page;
     166             :                                 break;
     167             :                         }
     168             : 
     169             :                         /* Add page to scatter list */
     170           0 :                         --(*offset);
     171           0 :                         sg_set_page(&sgl[*offset], page, page_len, 0);
     172             : 
     173           0 :                         continue;
     174             :                 }
     175             : 
     176             :                 /*
     177             :                  * Make the first non-reported page in the free list
     178             :                  * the new head of the free list before we release the
     179             :                  * zone lock.
     180             :                  */
     181           0 :                 if (!list_is_first(&page->lru, list))
     182           0 :                         list_rotate_to_front(&page->lru, list);
     183             : 
     184             :                 /* release lock before waiting on report processing */
     185           0 :                 spin_unlock_irq(&zone->lock);
     186             : 
     187             :                 /* begin processing pages in local list */
     188           0 :                 err = prdev->report(prdev, sgl, PAGE_REPORTING_CAPACITY);
     189             : 
     190             :                 /* reset offset since the full list was reported */
     191           0 :                 *offset = PAGE_REPORTING_CAPACITY;
     192             : 
     193             :                 /* update budget to reflect call to report function */
     194           0 :                 budget--;
     195             : 
     196             :                 /* reacquire zone lock and resume processing */
     197           0 :                 spin_lock_irq(&zone->lock);
     198             : 
     199             :                 /* flush reported pages from the sg list */
     200           0 :                 page_reporting_drain(prdev, sgl, PAGE_REPORTING_CAPACITY, !err);
     201             : 
     202             :                 /*
     203             :                  * Reset next to first entry, the old next isn't valid
     204             :                  * since we dropped the lock to report the pages
     205             :                  */
     206           0 :                 next = list_first_entry(list, struct page, lru);
     207             : 
     208             :                 /* exit on error */
     209           0 :                 if (err)
     210             :                         break;
     211             :         }
     212             : 
     213             :         /* Rotate any leftover pages to the head of the freelist */
     214           0 :         if (!list_entry_is_head(next, list, lru) && !list_is_first(&next->lru, list))
     215           0 :                 list_rotate_to_front(&next->lru, list);
     216             : 
     217           0 :         spin_unlock_irq(&zone->lock);
     218             : 
     219           0 :         return err;
     220             : }
     221             : 
     222             : static int
     223           0 : page_reporting_process_zone(struct page_reporting_dev_info *prdev,
     224             :                             struct scatterlist *sgl, struct zone *zone)
     225             : {
     226           0 :         unsigned int order, mt, leftover, offset = PAGE_REPORTING_CAPACITY;
     227           0 :         unsigned long watermark;
     228           0 :         int err = 0;
     229             : 
     230             :         /* Generate minimum watermark to be able to guarantee progress */
     231           0 :         watermark = low_wmark_pages(zone) +
     232             :                     (PAGE_REPORTING_CAPACITY << PAGE_REPORTING_MIN_ORDER);
     233             : 
     234             :         /*
     235             :          * Cancel request if insufficient free memory or if we failed
     236             :          * to allocate page reporting statistics for the zone.
     237             :          */
     238           0 :         if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
     239             :                 return err;
     240             : 
     241             :         /* Process each free list starting from lowest order/mt */
     242           0 :         for (order = PAGE_REPORTING_MIN_ORDER; order < MAX_ORDER; order++) {
     243           0 :                 for (mt = 0; mt < MIGRATE_TYPES; mt++) {
     244             :                         /* We do not pull pages from the isolate free list */
     245           0 :                         if (is_migrate_isolate(mt))
     246             :                                 continue;
     247             : 
     248           0 :                         err = page_reporting_cycle(prdev, zone, order, mt,
     249             :                                                    sgl, &offset);
     250           0 :                         if (err)
     251           0 :                                 return err;
     252             :                 }
     253             :         }
     254             : 
     255             :         /* report the leftover pages before going idle */
     256           0 :         leftover = PAGE_REPORTING_CAPACITY - offset;
     257           0 :         if (leftover) {
     258           0 :                 sgl = &sgl[offset];
     259           0 :                 err = prdev->report(prdev, sgl, leftover);
     260             : 
     261             :                 /* flush any remaining pages out from the last report */
     262           0 :                 spin_lock_irq(&zone->lock);
     263           0 :                 page_reporting_drain(prdev, sgl, leftover, !err);
     264           0 :                 spin_unlock_irq(&zone->lock);
     265             :         }
     266             : 
     267             :         return err;
     268             : }
     269             : 
     270           0 : static void page_reporting_process(struct work_struct *work)
     271             : {
     272           0 :         struct delayed_work *d_work = to_delayed_work(work);
     273           0 :         struct page_reporting_dev_info *prdev =
     274           0 :                 container_of(d_work, struct page_reporting_dev_info, work);
     275           0 :         int err = 0, state = PAGE_REPORTING_ACTIVE;
     276           0 :         struct scatterlist *sgl;
     277           0 :         struct zone *zone;
     278             : 
     279             :         /*
     280             :          * Change the state to "Active" so that we can track if there is
     281             :          * anyone requests page reporting after we complete our pass. If
     282             :          * the state is not altered by the end of the pass we will switch
     283             :          * to idle and quit scheduling reporting runs.
     284             :          */
     285           0 :         atomic_set(&prdev->state, state);
     286             : 
     287             :         /* allocate scatterlist to store pages being reported on */
     288           0 :         sgl = kmalloc_array(PAGE_REPORTING_CAPACITY, sizeof(*sgl), GFP_KERNEL);
     289           0 :         if (!sgl)
     290           0 :                 goto err_out;
     291             : 
     292           0 :         sg_init_table(sgl, PAGE_REPORTING_CAPACITY);
     293             : 
     294           0 :         for_each_zone(zone) {
     295           0 :                 err = page_reporting_process_zone(prdev, sgl, zone);
     296           0 :                 if (err)
     297             :                         break;
     298             :         }
     299             : 
     300           0 :         kfree(sgl);
     301           0 : err_out:
     302             :         /*
     303             :          * If the state has reverted back to requested then there may be
     304             :          * additional pages to be processed. We will defer for 2s to allow
     305             :          * more pages to accumulate.
     306             :          */
     307           0 :         state = atomic_cmpxchg(&prdev->state, state, PAGE_REPORTING_IDLE);
     308           0 :         if (state == PAGE_REPORTING_REQUESTED)
     309           0 :                 schedule_delayed_work(&prdev->work, PAGE_REPORTING_DELAY);
     310           0 : }
     311             : 
     312             : static DEFINE_MUTEX(page_reporting_mutex);
     313             : DEFINE_STATIC_KEY_FALSE(page_reporting_enabled);
     314             : 
     315           0 : int page_reporting_register(struct page_reporting_dev_info *prdev)
     316             : {
     317           0 :         int err = 0;
     318             : 
     319           0 :         mutex_lock(&page_reporting_mutex);
     320             : 
     321             :         /* nothing to do if already in use */
     322           0 :         if (rcu_access_pointer(pr_dev_info)) {
     323           0 :                 err = -EBUSY;
     324           0 :                 goto err_out;
     325             :         }
     326             : 
     327             :         /* initialize state and work structures */
     328           0 :         atomic_set(&prdev->state, PAGE_REPORTING_IDLE);
     329           0 :         INIT_DELAYED_WORK(&prdev->work, &page_reporting_process);
     330             : 
     331             :         /* Begin initial flush of zones */
     332           0 :         __page_reporting_request(prdev);
     333             : 
     334             :         /* Assign device to allow notifications */
     335           0 :         rcu_assign_pointer(pr_dev_info, prdev);
     336             : 
     337             :         /* enable page reporting notification */
     338           0 :         if (!static_key_enabled(&page_reporting_enabled)) {
     339           0 :                 static_branch_enable(&page_reporting_enabled);
     340           0 :                 pr_info("Free page reporting enabled\n");
     341             :         }
     342           0 : err_out:
     343           0 :         mutex_unlock(&page_reporting_mutex);
     344             : 
     345           0 :         return err;
     346             : }
     347             : EXPORT_SYMBOL_GPL(page_reporting_register);
     348             : 
     349           0 : void page_reporting_unregister(struct page_reporting_dev_info *prdev)
     350             : {
     351           0 :         mutex_lock(&page_reporting_mutex);
     352             : 
     353           0 :         if (rcu_access_pointer(pr_dev_info) == prdev) {
     354             :                 /* Disable page reporting notification */
     355           0 :                 RCU_INIT_POINTER(pr_dev_info, NULL);
     356           0 :                 synchronize_rcu();
     357             : 
     358             :                 /* Flush any existing work, and lock it out */
     359           0 :                 cancel_delayed_work_sync(&prdev->work);
     360             :         }
     361             : 
     362           0 :         mutex_unlock(&page_reporting_mutex);
     363           0 : }
     364             : EXPORT_SYMBOL_GPL(page_reporting_unregister);

Generated by: LCOV version 1.14