LCOV - code coverage report
Current view: top level - mm - pagewalk.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 0 226 0.0 %
Date: 2021-04-22 12:43:58 Functions: 0 12 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : #include <linux/pagewalk.h>
       3             : #include <linux/highmem.h>
       4             : #include <linux/sched.h>
       5             : #include <linux/hugetlb.h>
       6             : 
       7             : /*
       8             :  * We want to know the real level where a entry is located ignoring any
       9             :  * folding of levels which may be happening. For example if p4d is folded then
      10             :  * a missing entry found at level 1 (p4d) is actually at level 0 (pgd).
      11             :  */
      12           0 : static int real_depth(int depth)
      13             : {
      14           0 :         if (depth == 3 && PTRS_PER_PMD == 1)
      15             :                 depth = 2;
      16           0 :         if (depth == 2 && PTRS_PER_PUD == 1)
      17             :                 depth = 1;
      18           0 :         if (depth == 1 && PTRS_PER_P4D == 1)
      19           0 :                 depth = 0;
      20           0 :         return depth;
      21             : }
      22             : 
      23           0 : static int walk_pte_range_inner(pte_t *pte, unsigned long addr,
      24             :                                 unsigned long end, struct mm_walk *walk)
      25             : {
      26           0 :         const struct mm_walk_ops *ops = walk->ops;
      27           0 :         int err = 0;
      28             : 
      29           0 :         for (;;) {
      30           0 :                 err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
      31           0 :                 if (err)
      32             :                        break;
      33           0 :                 if (addr >= end - PAGE_SIZE)
      34             :                         break;
      35           0 :                 addr += PAGE_SIZE;
      36           0 :                 pte++;
      37             :         }
      38           0 :         return err;
      39             : }
      40             : 
      41           0 : static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
      42             :                           struct mm_walk *walk)
      43             : {
      44           0 :         pte_t *pte;
      45           0 :         int err = 0;
      46           0 :         spinlock_t *ptl;
      47             : 
      48           0 :         if (walk->no_vma) {
      49           0 :                 pte = pte_offset_map(pmd, addr);
      50           0 :                 err = walk_pte_range_inner(pte, addr, end, walk);
      51           0 :                 pte_unmap(pte);
      52             :         } else {
      53           0 :                 pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
      54           0 :                 err = walk_pte_range_inner(pte, addr, end, walk);
      55           0 :                 pte_unmap_unlock(pte, ptl);
      56             :         }
      57             : 
      58           0 :         return err;
      59             : }
      60             : 
      61           0 : static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
      62             :                           struct mm_walk *walk)
      63             : {
      64           0 :         pmd_t *pmd;
      65           0 :         unsigned long next;
      66           0 :         const struct mm_walk_ops *ops = walk->ops;
      67           0 :         int err = 0;
      68           0 :         int depth = real_depth(3);
      69             : 
      70           0 :         pmd = pmd_offset(pud, addr);
      71           0 :         do {
      72           0 : again:
      73           0 :                 next = pmd_addr_end(addr, end);
      74           0 :                 if (pmd_none(*pmd) || (!walk->vma && !walk->no_vma)) {
      75           0 :                         if (ops->pte_hole)
      76           0 :                                 err = ops->pte_hole(addr, next, depth, walk);
      77           0 :                         if (err)
      78             :                                 break;
      79           0 :                         continue;
      80             :                 }
      81             : 
      82           0 :                 walk->action = ACTION_SUBTREE;
      83             : 
      84             :                 /*
      85             :                  * This implies that each ->pmd_entry() handler
      86             :                  * needs to know about pmd_trans_huge() pmds
      87             :                  */
      88           0 :                 if (ops->pmd_entry)
      89           0 :                         err = ops->pmd_entry(pmd, addr, next, walk);
      90           0 :                 if (err)
      91             :                         break;
      92             : 
      93           0 :                 if (walk->action == ACTION_AGAIN)
      94           0 :                         goto again;
      95             : 
      96             :                 /*
      97             :                  * Check this here so we only break down trans_huge
      98             :                  * pages when we _need_ to
      99             :                  */
     100           0 :                 if ((!walk->vma && (pmd_leaf(*pmd) || !pmd_present(*pmd))) ||
     101           0 :                     walk->action == ACTION_CONTINUE ||
     102           0 :                     !(ops->pte_entry))
     103           0 :                         continue;
     104             : 
     105           0 :                 if (walk->vma) {
     106           0 :                         split_huge_pmd(walk->vma, pmd, addr);
     107           0 :                         if (pmd_trans_unstable(pmd))
     108           0 :                                 goto again;
     109             :                 }
     110             : 
     111           0 :                 err = walk_pte_range(pmd, addr, next, walk);
     112           0 :                 if (err)
     113             :                         break;
     114           0 :         } while (pmd++, addr = next, addr != end);
     115             : 
     116           0 :         return err;
     117             : }
     118             : 
     119           0 : static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
     120             :                           struct mm_walk *walk)
     121             : {
     122           0 :         pud_t *pud;
     123           0 :         unsigned long next;
     124           0 :         const struct mm_walk_ops *ops = walk->ops;
     125           0 :         int err = 0;
     126           0 :         int depth = real_depth(2);
     127             : 
     128           0 :         pud = pud_offset(p4d, addr);
     129           0 :         do {
     130           0 :  again:
     131           0 :                 next = pud_addr_end(addr, end);
     132           0 :                 if (pud_none(*pud) || (!walk->vma && !walk->no_vma)) {
     133           0 :                         if (ops->pte_hole)
     134           0 :                                 err = ops->pte_hole(addr, next, depth, walk);
     135           0 :                         if (err)
     136             :                                 break;
     137           0 :                         continue;
     138             :                 }
     139             : 
     140           0 :                 walk->action = ACTION_SUBTREE;
     141             : 
     142           0 :                 if (ops->pud_entry)
     143           0 :                         err = ops->pud_entry(pud, addr, next, walk);
     144           0 :                 if (err)
     145             :                         break;
     146             : 
     147           0 :                 if (walk->action == ACTION_AGAIN)
     148           0 :                         goto again;
     149             : 
     150           0 :                 if ((!walk->vma && (pud_leaf(*pud) || !pud_present(*pud))) ||
     151           0 :                     walk->action == ACTION_CONTINUE ||
     152           0 :                     !(ops->pmd_entry || ops->pte_entry))
     153           0 :                         continue;
     154             : 
     155           0 :                 if (walk->vma)
     156           0 :                         split_huge_pud(walk->vma, pud, addr);
     157           0 :                 if (pud_none(*pud))
     158           0 :                         goto again;
     159             : 
     160           0 :                 err = walk_pmd_range(pud, addr, next, walk);
     161           0 :                 if (err)
     162             :                         break;
     163           0 :         } while (pud++, addr = next, addr != end);
     164             : 
     165           0 :         return err;
     166             : }
     167             : 
     168           0 : static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
     169             :                           struct mm_walk *walk)
     170             : {
     171           0 :         p4d_t *p4d;
     172           0 :         unsigned long next;
     173           0 :         const struct mm_walk_ops *ops = walk->ops;
     174           0 :         int err = 0;
     175           0 :         int depth = real_depth(1);
     176             : 
     177           0 :         p4d = p4d_offset(pgd, addr);
     178           0 :         do {
     179           0 :                 next = p4d_addr_end(addr, end);
     180           0 :                 if (p4d_none_or_clear_bad(p4d)) {
     181           0 :                         if (ops->pte_hole)
     182           0 :                                 err = ops->pte_hole(addr, next, depth, walk);
     183           0 :                         if (err)
     184             :                                 break;
     185           0 :                         continue;
     186             :                 }
     187           0 :                 if (ops->p4d_entry) {
     188           0 :                         err = ops->p4d_entry(p4d, addr, next, walk);
     189           0 :                         if (err)
     190             :                                 break;
     191             :                 }
     192           0 :                 if (ops->pud_entry || ops->pmd_entry || ops->pte_entry)
     193           0 :                         err = walk_pud_range(p4d, addr, next, walk);
     194           0 :                 if (err)
     195             :                         break;
     196           0 :         } while (p4d++, addr = next, addr != end);
     197             : 
     198           0 :         return err;
     199             : }
     200             : 
     201           0 : static int walk_pgd_range(unsigned long addr, unsigned long end,
     202             :                           struct mm_walk *walk)
     203             : {
     204           0 :         pgd_t *pgd;
     205           0 :         unsigned long next;
     206           0 :         const struct mm_walk_ops *ops = walk->ops;
     207           0 :         int err = 0;
     208             : 
     209           0 :         if (walk->pgd)
     210           0 :                 pgd = walk->pgd + pgd_index(addr);
     211             :         else
     212           0 :                 pgd = pgd_offset(walk->mm, addr);
     213           0 :         do {
     214           0 :                 next = pgd_addr_end(addr, end);
     215           0 :                 if (pgd_none_or_clear_bad(pgd)) {
     216             :                         if (ops->pte_hole)
     217             :                                 err = ops->pte_hole(addr, next, 0, walk);
     218             :                         if (err)
     219             :                                 break;
     220             :                         continue;
     221             :                 }
     222           0 :                 if (ops->pgd_entry) {
     223           0 :                         err = ops->pgd_entry(pgd, addr, next, walk);
     224           0 :                         if (err)
     225             :                                 break;
     226             :                 }
     227           0 :                 if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry ||
     228           0 :                     ops->pte_entry)
     229           0 :                         err = walk_p4d_range(pgd, addr, next, walk);
     230           0 :                 if (err)
     231             :                         break;
     232           0 :         } while (pgd++, addr = next, addr != end);
     233             : 
     234           0 :         return err;
     235             : }
     236             : 
     237             : #ifdef CONFIG_HUGETLB_PAGE
     238             : static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
     239             :                                        unsigned long end)
     240             : {
     241             :         unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
     242             :         return boundary < end ? boundary : end;
     243             : }
     244             : 
     245             : static int walk_hugetlb_range(unsigned long addr, unsigned long end,
     246             :                               struct mm_walk *walk)
     247             : {
     248             :         struct vm_area_struct *vma = walk->vma;
     249             :         struct hstate *h = hstate_vma(vma);
     250             :         unsigned long next;
     251             :         unsigned long hmask = huge_page_mask(h);
     252             :         unsigned long sz = huge_page_size(h);
     253             :         pte_t *pte;
     254             :         const struct mm_walk_ops *ops = walk->ops;
     255             :         int err = 0;
     256             : 
     257             :         do {
     258             :                 next = hugetlb_entry_end(h, addr, end);
     259             :                 pte = huge_pte_offset(walk->mm, addr & hmask, sz);
     260             : 
     261             :                 if (pte)
     262             :                         err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
     263             :                 else if (ops->pte_hole)
     264             :                         err = ops->pte_hole(addr, next, -1, walk);
     265             : 
     266             :                 if (err)
     267             :                         break;
     268             :         } while (addr = next, addr != end);
     269             : 
     270             :         return err;
     271             : }
     272             : 
     273             : #else /* CONFIG_HUGETLB_PAGE */
     274             : static int walk_hugetlb_range(unsigned long addr, unsigned long end,
     275             :                               struct mm_walk *walk)
     276             : {
     277             :         return 0;
     278             : }
     279             : 
     280             : #endif /* CONFIG_HUGETLB_PAGE */
     281             : 
     282             : /*
     283             :  * Decide whether we really walk over the current vma on [@start, @end)
     284             :  * or skip it via the returned value. Return 0 if we do walk over the
     285             :  * current vma, and return 1 if we skip the vma. Negative values means
     286             :  * error, where we abort the current walk.
     287             :  */
     288           0 : static int walk_page_test(unsigned long start, unsigned long end,
     289             :                         struct mm_walk *walk)
     290             : {
     291           0 :         struct vm_area_struct *vma = walk->vma;
     292           0 :         const struct mm_walk_ops *ops = walk->ops;
     293             : 
     294           0 :         if (ops->test_walk)
     295           0 :                 return ops->test_walk(start, end, walk);
     296             : 
     297             :         /*
     298             :          * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP
     299             :          * range, so we don't walk over it as we do for normal vmas. However,
     300             :          * Some callers are interested in handling hole range and they don't
     301             :          * want to just ignore any single address range. Such users certainly
     302             :          * define their ->pte_hole() callbacks, so let's delegate them to handle
     303             :          * vma(VM_PFNMAP).
     304             :          */
     305           0 :         if (vma->vm_flags & VM_PFNMAP) {
     306           0 :                 int err = 1;
     307           0 :                 if (ops->pte_hole)
     308           0 :                         err = ops->pte_hole(start, end, -1, walk);
     309           0 :                 return err ? err : 1;
     310             :         }
     311             :         return 0;
     312             : }
     313             : 
     314           0 : static int __walk_page_range(unsigned long start, unsigned long end,
     315             :                         struct mm_walk *walk)
     316             : {
     317           0 :         int err = 0;
     318           0 :         struct vm_area_struct *vma = walk->vma;
     319           0 :         const struct mm_walk_ops *ops = walk->ops;
     320             : 
     321           0 :         if (vma && ops->pre_vma) {
     322           0 :                 err = ops->pre_vma(start, end, walk);
     323           0 :                 if (err)
     324             :                         return err;
     325             :         }
     326             : 
     327           0 :         if (vma && is_vm_hugetlb_page(vma)) {
     328             :                 if (ops->hugetlb_entry)
     329             :                         err = walk_hugetlb_range(start, end, walk);
     330             :         } else
     331           0 :                 err = walk_pgd_range(start, end, walk);
     332             : 
     333           0 :         if (vma && ops->post_vma)
     334           0 :                 ops->post_vma(walk);
     335             : 
     336             :         return err;
     337             : }
     338             : 
     339             : /**
     340             :  * walk_page_range - walk page table with caller specific callbacks
     341             :  * @mm:         mm_struct representing the target process of page table walk
     342             :  * @start:      start address of the virtual address range
     343             :  * @end:        end address of the virtual address range
     344             :  * @ops:        operation to call during the walk
     345             :  * @private:    private data for callbacks' usage
     346             :  *
     347             :  * Recursively walk the page table tree of the process represented by @mm
     348             :  * within the virtual address range [@start, @end). During walking, we can do
     349             :  * some caller-specific works for each entry, by setting up pmd_entry(),
     350             :  * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these
     351             :  * callbacks, the associated entries/pages are just ignored.
     352             :  * The return values of these callbacks are commonly defined like below:
     353             :  *
     354             :  *  - 0  : succeeded to handle the current entry, and if you don't reach the
     355             :  *         end address yet, continue to walk.
     356             :  *  - >0 : succeeded to handle the current entry, and return to the caller
     357             :  *         with caller specific value.
     358             :  *  - <0 : failed to handle the current entry, and return to the caller
     359             :  *         with error code.
     360             :  *
     361             :  * Before starting to walk page table, some callers want to check whether
     362             :  * they really want to walk over the current vma, typically by checking
     363             :  * its vm_flags. walk_page_test() and @ops->test_walk() are used for this
     364             :  * purpose.
     365             :  *
     366             :  * If operations need to be staged before and committed after a vma is walked,
     367             :  * there are two callbacks, pre_vma() and post_vma(). Note that post_vma(),
     368             :  * since it is intended to handle commit-type operations, can't return any
     369             :  * errors.
     370             :  *
     371             :  * struct mm_walk keeps current values of some common data like vma and pmd,
     372             :  * which are useful for the access from callbacks. If you want to pass some
     373             :  * caller-specific data to callbacks, @private should be helpful.
     374             :  *
     375             :  * Locking:
     376             :  *   Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_lock,
     377             :  *   because these function traverse vma list and/or access to vma's data.
     378             :  */
     379           0 : int walk_page_range(struct mm_struct *mm, unsigned long start,
     380             :                 unsigned long end, const struct mm_walk_ops *ops,
     381             :                 void *private)
     382             : {
     383           0 :         int err = 0;
     384           0 :         unsigned long next;
     385           0 :         struct vm_area_struct *vma;
     386           0 :         struct mm_walk walk = {
     387             :                 .ops            = ops,
     388             :                 .mm             = mm,
     389             :                 .private        = private,
     390             :         };
     391             : 
     392           0 :         if (start >= end)
     393             :                 return -EINVAL;
     394             : 
     395           0 :         if (!walk.mm)
     396             :                 return -EINVAL;
     397             : 
     398           0 :         mmap_assert_locked(walk.mm);
     399             : 
     400           0 :         vma = find_vma(walk.mm, start);
     401           0 :         do {
     402           0 :                 if (!vma) { /* after the last vma */
     403           0 :                         walk.vma = NULL;
     404           0 :                         next = end;
     405           0 :                 } else if (start < vma->vm_start) { /* outside vma */
     406           0 :                         walk.vma = NULL;
     407           0 :                         next = min(end, vma->vm_start);
     408             :                 } else { /* inside vma */
     409           0 :                         walk.vma = vma;
     410           0 :                         next = min(end, vma->vm_end);
     411           0 :                         vma = vma->vm_next;
     412             : 
     413           0 :                         err = walk_page_test(start, next, &walk);
     414           0 :                         if (err > 0) {
     415             :                                 /*
     416             :                                  * positive return values are purely for
     417             :                                  * controlling the pagewalk, so should never
     418             :                                  * be passed to the callers.
     419             :                                  */
     420           0 :                                 err = 0;
     421           0 :                                 continue;
     422             :                         }
     423           0 :                         if (err < 0)
     424             :                                 break;
     425             :                 }
     426           0 :                 if (walk.vma || walk.ops->pte_hole)
     427           0 :                         err = __walk_page_range(start, next, &walk);
     428           0 :                 if (err)
     429             :                         break;
     430           0 :         } while (start = next, start < end);
     431             :         return err;
     432             : }
     433             : 
     434             : /*
     435             :  * Similar to walk_page_range() but can walk any page tables even if they are
     436             :  * not backed by VMAs. Because 'unusual' entries may be walked this function
     437             :  * will also not lock the PTEs for the pte_entry() callback. This is useful for
     438             :  * walking the kernel pages tables or page tables for firmware.
     439             :  */
     440           0 : int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
     441             :                           unsigned long end, const struct mm_walk_ops *ops,
     442             :                           pgd_t *pgd,
     443             :                           void *private)
     444             : {
     445           0 :         struct mm_walk walk = {
     446             :                 .ops            = ops,
     447             :                 .mm             = mm,
     448             :                 .pgd            = pgd,
     449             :                 .private        = private,
     450             :                 .no_vma         = true
     451             :         };
     452             : 
     453           0 :         if (start >= end || !walk.mm)
     454             :                 return -EINVAL;
     455             : 
     456           0 :         mmap_assert_locked(walk.mm);
     457             : 
     458           0 :         return __walk_page_range(start, end, &walk);
     459             : }
     460             : 
     461           0 : int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
     462             :                 void *private)
     463             : {
     464           0 :         struct mm_walk walk = {
     465             :                 .ops            = ops,
     466           0 :                 .mm             = vma->vm_mm,
     467             :                 .vma            = vma,
     468             :                 .private        = private,
     469             :         };
     470           0 :         int err;
     471             : 
     472           0 :         if (!walk.mm)
     473             :                 return -EINVAL;
     474             : 
     475           0 :         mmap_assert_locked(walk.mm);
     476             : 
     477           0 :         err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
     478           0 :         if (err > 0)
     479             :                 return 0;
     480           0 :         if (err < 0)
     481             :                 return err;
     482           0 :         return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
     483             : }
     484             : 
     485             : /**
     486             :  * walk_page_mapping - walk all memory areas mapped into a struct address_space.
     487             :  * @mapping: Pointer to the struct address_space
     488             :  * @first_index: First page offset in the address_space
     489             :  * @nr: Number of incremental page offsets to cover
     490             :  * @ops:        operation to call during the walk
     491             :  * @private:    private data for callbacks' usage
     492             :  *
     493             :  * This function walks all memory areas mapped into a struct address_space.
     494             :  * The walk is limited to only the given page-size index range, but if
     495             :  * the index boundaries cross a huge page-table entry, that entry will be
     496             :  * included.
     497             :  *
     498             :  * Also see walk_page_range() for additional information.
     499             :  *
     500             :  * Locking:
     501             :  *   This function can't require that the struct mm_struct::mmap_lock is held,
     502             :  *   since @mapping may be mapped by multiple processes. Instead
     503             :  *   @mapping->i_mmap_rwsem must be held. This might have implications in the
     504             :  *   callbacks, and it's up tho the caller to ensure that the
     505             :  *   struct mm_struct::mmap_lock is not needed.
     506             :  *
     507             :  *   Also this means that a caller can't rely on the struct
     508             :  *   vm_area_struct::vm_flags to be constant across a call,
     509             :  *   except for immutable flags. Callers requiring this shouldn't use
     510             :  *   this function.
     511             :  *
     512             :  * Return: 0 on success, negative error code on failure, positive number on
     513             :  * caller defined premature termination.
     514             :  */
     515           0 : int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
     516             :                       pgoff_t nr, const struct mm_walk_ops *ops,
     517             :                       void *private)
     518             : {
     519           0 :         struct mm_walk walk = {
     520             :                 .ops            = ops,
     521             :                 .private        = private,
     522             :         };
     523           0 :         struct vm_area_struct *vma;
     524           0 :         pgoff_t vba, vea, cba, cea;
     525           0 :         unsigned long start_addr, end_addr;
     526           0 :         int err = 0;
     527             : 
     528           0 :         lockdep_assert_held(&mapping->i_mmap_rwsem);
     529           0 :         vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index,
     530             :                                   first_index + nr - 1) {
     531             :                 /* Clip to the vma */
     532           0 :                 vba = vma->vm_pgoff;
     533           0 :                 vea = vba + vma_pages(vma);
     534           0 :                 cba = first_index;
     535           0 :                 cba = max(cba, vba);
     536           0 :                 cea = first_index + nr;
     537           0 :                 cea = min(cea, vea);
     538             : 
     539           0 :                 start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start;
     540           0 :                 end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start;
     541           0 :                 if (start_addr >= end_addr)
     542           0 :                         continue;
     543             : 
     544           0 :                 walk.vma = vma;
     545           0 :                 walk.mm = vma->vm_mm;
     546             : 
     547           0 :                 err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
     548           0 :                 if (err > 0) {
     549             :                         err = 0;
     550             :                         break;
     551           0 :                 } else if (err < 0)
     552             :                         break;
     553             : 
     554           0 :                 err = __walk_page_range(start_addr, end_addr, &walk);
     555           0 :                 if (err)
     556             :                         break;
     557             :         }
     558             : 
     559           0 :         return err;
     560             : }

Generated by: LCOV version 1.14