LCOV - code coverage report
Current view: top level - arch/x86/mm - pgtable.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 110 231 47.6 %
Date: 2021-04-22 12:43:58 Functions: 18 33 54.5 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : #include <linux/mm.h>
       3             : #include <linux/gfp.h>
       4             : #include <linux/hugetlb.h>
       5             : #include <asm/pgalloc.h>
       6             : #include <asm/tlb.h>
       7             : #include <asm/fixmap.h>
       8             : #include <asm/mtrr.h>
       9             : 
      10             : #ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
      11             : phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
      12             : EXPORT_SYMBOL(physical_mask);
      13             : #endif
      14             : 
      15             : #ifdef CONFIG_HIGHPTE
      16             : #define PGTABLE_HIGHMEM __GFP_HIGHMEM
      17             : #else
      18             : #define PGTABLE_HIGHMEM 0
      19             : #endif
      20             : 
      21             : #ifndef CONFIG_PARAVIRT
      22             : static inline
      23             : void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
      24             : {
      25             :         tlb_remove_page(tlb, table);
      26             : }
      27             : #endif
      28             : 
      29             : gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM;
      30             : 
      31       45080 : pgtable_t pte_alloc_one(struct mm_struct *mm)
      32             : {
      33       45080 :         return __pte_alloc_one(mm, __userpte_alloc_gfp);
      34             : }
      35             : 
      36           0 : static int __init setup_userpte(char *arg)
      37             : {
      38           0 :         if (!arg)
      39             :                 return -EINVAL;
      40             : 
      41             :         /*
      42             :          * "userpte=nohigh" disables allocation of user pagetables in
      43             :          * high memory.
      44             :          */
      45           0 :         if (strcmp(arg, "nohigh") == 0)
      46           0 :                 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
      47             :         else
      48             :                 return -EINVAL;
      49           0 :         return 0;
      50             : }
      51             : early_param("userpte", setup_userpte);
      52             : 
      53       39889 : void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
      54             : {
      55       39889 :         pgtable_pte_page_dtor(pte);
      56       39889 :         paravirt_release_pte(page_to_pfn(pte));
      57       39889 :         paravirt_tlb_remove_table(tlb, pte);
      58       39889 : }
      59             : 
      60             : #if CONFIG_PGTABLE_LEVELS > 2
      61       22771 : void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
      62             : {
      63       22771 :         struct page *page = virt_to_page(pmd);
      64       22771 :         paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
      65             :         /*
      66             :          * NOTE! For PAE, any changes to the top page-directory-pointer-table
      67             :          * entries need a full cr3 reload to flush.
      68             :          */
      69             : #ifdef CONFIG_X86_PAE
      70             :         tlb->need_flush_all = 1;
      71             : #endif
      72       22771 :         pgtable_pmd_page_dtor(page);
      73       22771 :         paravirt_tlb_remove_table(tlb, page);
      74       22771 : }
      75             : 
      76             : #if CONFIG_PGTABLE_LEVELS > 3
      77       18410 : void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
      78             : {
      79       18410 :         paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
      80       18410 :         paravirt_tlb_remove_table(tlb, virt_to_page(pud));
      81       18410 : }
      82             : 
      83             : #if CONFIG_PGTABLE_LEVELS > 4
      84             : void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
      85             : {
      86             :         paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
      87             :         paravirt_tlb_remove_table(tlb, virt_to_page(p4d));
      88             : }
      89             : #endif  /* CONFIG_PGTABLE_LEVELS > 4 */
      90             : #endif  /* CONFIG_PGTABLE_LEVELS > 3 */
      91             : #endif  /* CONFIG_PGTABLE_LEVELS > 2 */
      92             : 
      93       10325 : static inline void pgd_list_add(pgd_t *pgd)
      94             : {
      95       10325 :         struct page *page = virt_to_page(pgd);
      96             : 
      97       10325 :         list_add(&page->lru, &pgd_list);
      98       10325 : }
      99             : 
     100       10301 : static inline void pgd_list_del(pgd_t *pgd)
     101             : {
     102       10301 :         struct page *page = virt_to_page(pgd);
     103             : 
     104       10301 :         list_del(&page->lru);
     105       10301 : }
     106             : 
     107             : #define UNSHARED_PTRS_PER_PGD                           \
     108             :         (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
     109             : #define MAX_UNSHARED_PTRS_PER_PGD                       \
     110             :         max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD)
     111             : 
     112             : 
     113       10325 : static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
     114             : {
     115       10325 :         virt_to_page(pgd)->pt_mm = mm;
     116       10325 : }
     117             : 
     118           0 : struct mm_struct *pgd_page_get_mm(struct page *page)
     119             : {
     120           0 :         return page->pt_mm;
     121             : }
     122             : 
     123       10325 : static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
     124             : {
     125             :         /* If the pgd points to a shared pagetable level (either the
     126             :            ptes in non-PAE, or shared PMD in PAE), then just copy the
     127             :            references from swapper_pg_dir. */
     128       10325 :         if (CONFIG_PGTABLE_LEVELS == 2 ||
     129             :             (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
     130             :             CONFIG_PGTABLE_LEVELS >= 4) {
     131       10325 :                 clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
     132             :                                 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
     133             :                                 KERNEL_PGD_PTRS);
     134             :         }
     135             : 
     136             :         /* list required to sync kernel mapping updates */
     137       10325 :         if (!SHARED_KERNEL_PMD) {
     138       10325 :                 pgd_set_mm(pgd, mm);
     139       10325 :                 pgd_list_add(pgd);
     140             :         }
     141       10325 : }
     142             : 
     143       10301 : static void pgd_dtor(pgd_t *pgd)
     144             : {
     145       10301 :         if (SHARED_KERNEL_PMD)
     146             :                 return;
     147             : 
     148       10301 :         spin_lock(&pgd_lock);
     149       10301 :         pgd_list_del(pgd);
     150       10301 :         spin_unlock(&pgd_lock);
     151             : }
     152             : 
     153             : /*
     154             :  * List of all pgd's needed for non-PAE so it can invalidate entries
     155             :  * in both cached and uncached pgd's; not needed for PAE since the
     156             :  * kernel pmd is shared. If PAE were not to share the pmd a similar
     157             :  * tactic would be needed. This is essentially codepath-based locking
     158             :  * against pageattr.c; it is the unique case in which a valid change
     159             :  * of kernel pagetables can't be lazily synchronized by vmalloc faults.
     160             :  * vmalloc faults work because attached pagetables are never freed.
     161             :  * -- nyc
     162             :  */
     163             : 
     164             : #ifdef CONFIG_X86_PAE
     165             : /*
     166             :  * In PAE mode, we need to do a cr3 reload (=tlb flush) when
     167             :  * updating the top-level pagetable entries to guarantee the
     168             :  * processor notices the update.  Since this is expensive, and
     169             :  * all 4 top-level entries are used almost immediately in a
     170             :  * new process's life, we just pre-populate them here.
     171             :  *
     172             :  * Also, if we're in a paravirt environment where the kernel pmd is
     173             :  * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
     174             :  * and initialize the kernel pmds here.
     175             :  */
     176             : #define PREALLOCATED_PMDS       UNSHARED_PTRS_PER_PGD
     177             : #define MAX_PREALLOCATED_PMDS   MAX_UNSHARED_PTRS_PER_PGD
     178             : 
     179             : /*
     180             :  * We allocate separate PMDs for the kernel part of the user page-table
     181             :  * when PTI is enabled. We need them to map the per-process LDT into the
     182             :  * user-space page-table.
     183             :  */
     184             : #define PREALLOCATED_USER_PMDS   (boot_cpu_has(X86_FEATURE_PTI) ? \
     185             :                                         KERNEL_PGD_PTRS : 0)
     186             : #define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS
     187             : 
     188             : void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
     189             : {
     190             :         paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
     191             : 
     192             :         /* Note: almost everything apart from _PAGE_PRESENT is
     193             :            reserved at the pmd (PDPT) level. */
     194             :         set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
     195             : 
     196             :         /*
     197             :          * According to Intel App note "TLBs, Paging-Structure Caches,
     198             :          * and Their Invalidation", April 2007, document 317080-001,
     199             :          * section 8.1: in PAE mode we explicitly have to flush the
     200             :          * TLB via cr3 if the top-level pgd is changed...
     201             :          */
     202             :         flush_tlb_mm(mm);
     203             : }
     204             : #else  /* !CONFIG_X86_PAE */
     205             : 
     206             : /* No need to prepopulate any pagetable entries in non-PAE modes. */
     207             : #define PREALLOCATED_PMDS       0
     208             : #define MAX_PREALLOCATED_PMDS   0
     209             : #define PREALLOCATED_USER_PMDS   0
     210             : #define MAX_PREALLOCATED_USER_PMDS 0
     211             : #endif  /* CONFIG_X86_PAE */
     212             : 
     213           0 : static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
     214             : {
     215           0 :         int i;
     216             : 
     217           0 :         for (i = 0; i < count; i++)
     218           0 :                 if (pmds[i]) {
     219           0 :                         pgtable_pmd_page_dtor(virt_to_page(pmds[i]));
     220           0 :                         free_page((unsigned long)pmds[i]);
     221           0 :                         mm_dec_nr_pmds(mm);
     222             :                 }
     223           0 : }
     224             : 
     225       20650 : static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
     226             : {
     227       20650 :         int i;
     228       20650 :         bool failed = false;
     229       20650 :         gfp_t gfp = GFP_PGTABLE_USER;
     230             : 
     231       20650 :         if (mm == &init_mm)
     232           0 :                 gfp &= ~__GFP_ACCOUNT;
     233             : 
     234       20650 :         for (i = 0; i < count; i++) {
     235           0 :                 pmd_t *pmd = (pmd_t *)__get_free_page(gfp);
     236           0 :                 if (!pmd)
     237           0 :                         failed = true;
     238           0 :                 if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
     239           0 :                         free_page((unsigned long)pmd);
     240           0 :                         pmd = NULL;
     241           0 :                         failed = true;
     242             :                 }
     243           0 :                 if (pmd)
     244           0 :                         mm_inc_nr_pmds(mm);
     245           0 :                 pmds[i] = pmd;
     246             :         }
     247             : 
     248       20650 :         if (failed) {
     249           0 :                 free_pmds(mm, pmds, count);
     250           0 :                 return -ENOMEM;
     251             :         }
     252             : 
     253             :         return 0;
     254             : }
     255             : 
     256             : /*
     257             :  * Mop up any pmd pages which may still be attached to the pgd.
     258             :  * Normally they will be freed by munmap/exit_mmap, but any pmd we
     259             :  * preallocate which never got a corresponding vma will need to be
     260             :  * freed manually.
     261             :  */
     262             : static void mop_up_one_pmd(struct mm_struct *mm, pgd_t *pgdp)
     263             : {
     264             :         pgd_t pgd = *pgdp;
     265             : 
     266             :         if (pgd_val(pgd) != 0) {
     267             :                 pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
     268             : 
     269             :                 pgd_clear(pgdp);
     270             : 
     271             :                 paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
     272             :                 pmd_free(mm, pmd);
     273             :                 mm_dec_nr_pmds(mm);
     274             :         }
     275             : }
     276             : 
     277       10301 : static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
     278             : {
     279       10301 :         int i;
     280             : 
     281       10301 :         for (i = 0; i < PREALLOCATED_PMDS; i++)
     282             :                 mop_up_one_pmd(mm, &pgdp[i]);
     283             : 
     284             : #ifdef CONFIG_PAGE_TABLE_ISOLATION
     285             : 
     286             :         if (!boot_cpu_has(X86_FEATURE_PTI))
     287             :                 return;
     288             : 
     289             :         pgdp = kernel_to_user_pgdp(pgdp);
     290             : 
     291             :         for (i = 0; i < PREALLOCATED_USER_PMDS; i++)
     292             :                 mop_up_one_pmd(mm, &pgdp[i + KERNEL_PGD_BOUNDARY]);
     293             : #endif
     294             : }
     295             : 
     296       10325 : static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
     297             : {
     298       10325 :         p4d_t *p4d;
     299       10325 :         pud_t *pud;
     300       10325 :         int i;
     301             : 
     302       10325 :         if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
     303       10325 :                 return;
     304             : 
     305             :         p4d = p4d_offset(pgd, 0);
     306             :         pud = pud_offset(p4d, 0);
     307             : 
     308             :         for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
     309             :                 pmd_t *pmd = pmds[i];
     310             : 
     311             :                 if (i >= KERNEL_PGD_BOUNDARY)
     312             :                         memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
     313             :                                sizeof(pmd_t) * PTRS_PER_PMD);
     314             : 
     315             :                 pud_populate(mm, pud, pmd);
     316             :         }
     317             : }
     318             : 
     319             : #ifdef CONFIG_PAGE_TABLE_ISOLATION
     320             : static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
     321             :                                      pgd_t *k_pgd, pmd_t *pmds[])
     322             : {
     323             :         pgd_t *s_pgd = kernel_to_user_pgdp(swapper_pg_dir);
     324             :         pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
     325             :         p4d_t *u_p4d;
     326             :         pud_t *u_pud;
     327             :         int i;
     328             : 
     329             :         u_p4d = p4d_offset(u_pgd, 0);
     330             :         u_pud = pud_offset(u_p4d, 0);
     331             : 
     332             :         s_pgd += KERNEL_PGD_BOUNDARY;
     333             :         u_pud += KERNEL_PGD_BOUNDARY;
     334             : 
     335             :         for (i = 0; i < PREALLOCATED_USER_PMDS; i++, u_pud++, s_pgd++) {
     336             :                 pmd_t *pmd = pmds[i];
     337             : 
     338             :                 memcpy(pmd, (pmd_t *)pgd_page_vaddr(*s_pgd),
     339             :                        sizeof(pmd_t) * PTRS_PER_PMD);
     340             : 
     341             :                 pud_populate(mm, u_pud, pmd);
     342             :         }
     343             : 
     344             : }
     345             : #else
     346       10325 : static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
     347             :                                      pgd_t *k_pgd, pmd_t *pmds[])
     348             : {
     349       10325 : }
     350             : #endif
     351             : /*
     352             :  * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
     353             :  * assumes that pgd should be in one page.
     354             :  *
     355             :  * But kernel with PAE paging that is not running as a Xen domain
     356             :  * only needs to allocate 32 bytes for pgd instead of one page.
     357             :  */
     358             : #ifdef CONFIG_X86_PAE
     359             : 
     360             : #include <linux/slab.h>
     361             : 
     362             : #define PGD_SIZE        (PTRS_PER_PGD * sizeof(pgd_t))
     363             : #define PGD_ALIGN       32
     364             : 
     365             : static struct kmem_cache *pgd_cache;
     366             : 
     367             : void __init pgtable_cache_init(void)
     368             : {
     369             :         /*
     370             :          * When PAE kernel is running as a Xen domain, it does not use
     371             :          * shared kernel pmd. And this requires a whole page for pgd.
     372             :          */
     373             :         if (!SHARED_KERNEL_PMD)
     374             :                 return;
     375             : 
     376             :         /*
     377             :          * when PAE kernel is not running as a Xen domain, it uses
     378             :          * shared kernel pmd. Shared kernel pmd does not require a whole
     379             :          * page for pgd. We are able to just allocate a 32-byte for pgd.
     380             :          * During boot time, we create a 32-byte slab for pgd table allocation.
     381             :          */
     382             :         pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
     383             :                                       SLAB_PANIC, NULL);
     384             : }
     385             : 
     386             : static inline pgd_t *_pgd_alloc(void)
     387             : {
     388             :         /*
     389             :          * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
     390             :          * We allocate one page for pgd.
     391             :          */
     392             :         if (!SHARED_KERNEL_PMD)
     393             :                 return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
     394             :                                                  PGD_ALLOCATION_ORDER);
     395             : 
     396             :         /*
     397             :          * Now PAE kernel is not running as a Xen domain. We can allocate
     398             :          * a 32-byte slab for pgd to save memory space.
     399             :          */
     400             :         return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER);
     401             : }
     402             : 
     403             : static inline void _pgd_free(pgd_t *pgd)
     404             : {
     405             :         if (!SHARED_KERNEL_PMD)
     406             :                 free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
     407             :         else
     408             :                 kmem_cache_free(pgd_cache, pgd);
     409             : }
     410             : #else
     411             : 
     412       10325 : static inline pgd_t *_pgd_alloc(void)
     413             : {
     414       20650 :         return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
     415             :                                          PGD_ALLOCATION_ORDER);
     416             : }
     417             : 
     418       10301 : static inline void _pgd_free(pgd_t *pgd)
     419             : {
     420       10301 :         free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
     421           0 : }
     422             : #endif /* CONFIG_X86_PAE */
     423             : 
     424       10325 : pgd_t *pgd_alloc(struct mm_struct *mm)
     425             : {
     426       10325 :         pgd_t *pgd;
     427       10325 :         pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS];
     428       10325 :         pmd_t *pmds[MAX_PREALLOCATED_PMDS];
     429             : 
     430       10325 :         pgd = _pgd_alloc();
     431             : 
     432       10325 :         if (pgd == NULL)
     433           0 :                 goto out;
     434             : 
     435       10325 :         mm->pgd = pgd;
     436             : 
     437       10325 :         if (preallocate_pmds(mm, pmds, PREALLOCATED_PMDS) != 0)
     438           0 :                 goto out_free_pgd;
     439             : 
     440       10325 :         if (preallocate_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS) != 0)
     441           0 :                 goto out_free_pmds;
     442             : 
     443       10325 :         if (paravirt_pgd_alloc(mm) != 0)
     444             :                 goto out_free_user_pmds;
     445             : 
     446             :         /*
     447             :          * Make sure that pre-populating the pmds is atomic with
     448             :          * respect to anything walking the pgd_list, so that they
     449             :          * never see a partially populated pgd.
     450             :          */
     451       10325 :         spin_lock(&pgd_lock);
     452             : 
     453       10325 :         pgd_ctor(mm, pgd);
     454       10325 :         pgd_prepopulate_pmd(mm, pgd, pmds);
     455       10325 :         pgd_prepopulate_user_pmd(mm, pgd, u_pmds);
     456             : 
     457       10325 :         spin_unlock(&pgd_lock);
     458             : 
     459       10325 :         return pgd;
     460             : 
     461             : out_free_user_pmds:
     462             :         free_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS);
     463           0 : out_free_pmds:
     464           0 :         free_pmds(mm, pmds, PREALLOCATED_PMDS);
     465           0 : out_free_pgd:
     466           0 :         _pgd_free(pgd);
     467             : out:
     468             :         return NULL;
     469             : }
     470             : 
     471       10301 : void pgd_free(struct mm_struct *mm, pgd_t *pgd)
     472             : {
     473       10301 :         pgd_mop_up_pmds(mm, pgd);
     474       10301 :         pgd_dtor(pgd);
     475       10301 :         paravirt_pgd_free(mm, pgd);
     476       10301 :         _pgd_free(pgd);
     477       10301 : }
     478             : 
     479             : /*
     480             :  * Used to set accessed or dirty bits in the page table entries
     481             :  * on other architectures. On x86, the accessed and dirty bits
     482             :  * are tracked by hardware. However, do_wp_page calls this function
     483             :  * to also make the pte writeable at the same time the dirty bit is
     484             :  * set. In that case we do actually need to write the PTE.
     485             :  */
     486       32324 : int ptep_set_access_flags(struct vm_area_struct *vma,
     487             :                           unsigned long address, pte_t *ptep,
     488             :                           pte_t entry, int dirty)
     489             : {
     490       32324 :         int changed = !pte_same(*ptep, entry);
     491             : 
     492       32324 :         if (changed && dirty)
     493       32017 :                 set_pte(ptep, entry);
     494             : 
     495       32324 :         return changed;
     496             : }
     497             : 
     498             : #ifdef CONFIG_TRANSPARENT_HUGEPAGE
     499           0 : int pmdp_set_access_flags(struct vm_area_struct *vma,
     500             :                           unsigned long address, pmd_t *pmdp,
     501             :                           pmd_t entry, int dirty)
     502             : {
     503           0 :         int changed = !pmd_same(*pmdp, entry);
     504             : 
     505           0 :         VM_BUG_ON(address & ~HPAGE_PMD_MASK);
     506             : 
     507           0 :         if (changed && dirty) {
     508           0 :                 set_pmd(pmdp, entry);
     509             :                 /*
     510             :                  * We had a write-protection fault here and changed the pmd
     511             :                  * to to more permissive. No need to flush the TLB for that,
     512             :                  * #PF is architecturally guaranteed to do that and in the
     513             :                  * worst-case we'll generate a spurious fault.
     514             :                  */
     515             :         }
     516             : 
     517           0 :         return changed;
     518             : }
     519             : 
     520           0 : int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
     521             :                           pud_t *pudp, pud_t entry, int dirty)
     522             : {
     523           0 :         int changed = !pud_same(*pudp, entry);
     524             : 
     525           0 :         VM_BUG_ON(address & ~HPAGE_PUD_MASK);
     526             : 
     527           0 :         if (changed && dirty) {
     528           0 :                 set_pud(pudp, entry);
     529             :                 /*
     530             :                  * We had a write-protection fault here and changed the pud
     531             :                  * to to more permissive. No need to flush the TLB for that,
     532             :                  * #PF is architecturally guaranteed to do that and in the
     533             :                  * worst-case we'll generate a spurious fault.
     534             :                  */
     535             :         }
     536             : 
     537           0 :         return changed;
     538             : }
     539             : #endif
     540             : 
     541           0 : int ptep_test_and_clear_young(struct vm_area_struct *vma,
     542             :                               unsigned long addr, pte_t *ptep)
     543             : {
     544           0 :         int ret = 0;
     545             : 
     546           0 :         if (pte_young(*ptep))
     547           0 :                 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
     548           0 :                                          (unsigned long *) &ptep->pte);
     549             : 
     550           0 :         return ret;
     551             : }
     552             : 
     553             : #ifdef CONFIG_TRANSPARENT_HUGEPAGE
     554           0 : int pmdp_test_and_clear_young(struct vm_area_struct *vma,
     555             :                               unsigned long addr, pmd_t *pmdp)
     556             : {
     557           0 :         int ret = 0;
     558             : 
     559           0 :         if (pmd_young(*pmdp))
     560           0 :                 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
     561             :                                          (unsigned long *)pmdp);
     562             : 
     563           0 :         return ret;
     564             : }
     565           0 : int pudp_test_and_clear_young(struct vm_area_struct *vma,
     566             :                               unsigned long addr, pud_t *pudp)
     567             : {
     568           0 :         int ret = 0;
     569             : 
     570           0 :         if (pud_young(*pudp))
     571           0 :                 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
     572             :                                          (unsigned long *)pudp);
     573             : 
     574           0 :         return ret;
     575             : }
     576             : #endif
     577             : 
     578           0 : int ptep_clear_flush_young(struct vm_area_struct *vma,
     579             :                            unsigned long address, pte_t *ptep)
     580             : {
     581             :         /*
     582             :          * On x86 CPUs, clearing the accessed bit without a TLB flush
     583             :          * doesn't cause data corruption. [ It could cause incorrect
     584             :          * page aging and the (mistaken) reclaim of hot pages, but the
     585             :          * chance of that should be relatively low. ]
     586             :          *
     587             :          * So as a performance optimization don't flush the TLB when
     588             :          * clearing the accessed bit, it will eventually be flushed by
     589             :          * a context switch or a VM operation anyway. [ In the rare
     590             :          * event of it not getting flushed for a long time the delay
     591             :          * shouldn't really matter because there's no real memory
     592             :          * pressure for swapout to react to. ]
     593             :          */
     594           0 :         return ptep_test_and_clear_young(vma, address, ptep);
     595             : }
     596             : 
     597             : #ifdef CONFIG_TRANSPARENT_HUGEPAGE
     598           0 : int pmdp_clear_flush_young(struct vm_area_struct *vma,
     599             :                            unsigned long address, pmd_t *pmdp)
     600             : {
     601           0 :         int young;
     602             : 
     603           0 :         VM_BUG_ON(address & ~HPAGE_PMD_MASK);
     604             : 
     605           0 :         young = pmdp_test_and_clear_young(vma, address, pmdp);
     606           0 :         if (young)
     607           0 :                 flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
     608             : 
     609           0 :         return young;
     610             : }
     611             : #endif
     612             : 
     613             : /**
     614             :  * reserve_top_address - reserves a hole in the top of kernel address space
     615             :  * @reserve - size of hole to reserve
     616             :  *
     617             :  * Can be used to relocate the fixmap area and poke a hole in the top
     618             :  * of kernel address space to make room for a hypervisor.
     619             :  */
     620           0 : void __init reserve_top_address(unsigned long reserve)
     621             : {
     622             : #ifdef CONFIG_X86_32
     623             :         BUG_ON(fixmaps_set > 0);
     624             :         __FIXADDR_TOP = round_down(-reserve, 1 << PMD_SHIFT) - PAGE_SIZE;
     625             :         printk(KERN_INFO "Reserving virtual address space above 0x%08lx (rounded to 0x%08lx)\n",
     626             :                -reserve, __FIXADDR_TOP + PAGE_SIZE);
     627             : #endif
     628           0 : }
     629             : 
     630             : int fixmaps_set;
     631             : 
     632           3 : void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
     633             : {
     634           3 :         unsigned long address = __fix_to_virt(idx);
     635             : 
     636             : #ifdef CONFIG_X86_64
     637             :        /*
     638             :         * Ensure that the static initial page tables are covering the
     639             :         * fixmap completely.
     640             :         */
     641           3 :         BUILD_BUG_ON(__end_of_permanent_fixed_addresses >
     642             :                      (FIXMAP_PMD_NUM * PTRS_PER_PTE));
     643             : #endif
     644             : 
     645           3 :         if (idx >= __end_of_fixed_addresses) {
     646           0 :                 BUG();
     647             :                 return;
     648             :         }
     649           3 :         set_pte_vaddr(address, pte);
     650           3 :         fixmaps_set++;
     651             : }
     652             : 
     653           3 : void native_set_fixmap(unsigned /* enum fixed_addresses */ idx,
     654             :                        phys_addr_t phys, pgprot_t flags)
     655             : {
     656             :         /* Sanitize 'prot' against any unsupported bits: */
     657           3 :         pgprot_val(flags) &= __default_kernel_pte_mask;
     658             : 
     659           3 :         __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
     660           3 : }
     661             : 
     662             : #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
     663             : #ifdef CONFIG_X86_5LEVEL
     664             : /**
     665             :  * p4d_set_huge - setup kernel P4D mapping
     666             :  *
     667             :  * No 512GB pages yet -- always return 0
     668             :  */
     669             : int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
     670             : {
     671             :         return 0;
     672             : }
     673             : 
     674             : /**
     675             :  * p4d_clear_huge - clear kernel P4D mapping when it is set
     676             :  *
     677             :  * No 512GB pages yet -- always return 0
     678             :  */
     679             : int p4d_clear_huge(p4d_t *p4d)
     680             : {
     681             :         return 0;
     682             : }
     683             : #endif
     684             : 
     685             : /**
     686             :  * pud_set_huge - setup kernel PUD mapping
     687             :  *
     688             :  * MTRRs can override PAT memory types with 4KiB granularity. Therefore, this
     689             :  * function sets up a huge page only if any of the following conditions are met:
     690             :  *
     691             :  * - MTRRs are disabled, or
     692             :  *
     693             :  * - MTRRs are enabled and the range is completely covered by a single MTRR, or
     694             :  *
     695             :  * - MTRRs are enabled and the corresponding MTRR memory type is WB, which
     696             :  *   has no effect on the requested PAT memory type.
     697             :  *
     698             :  * Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger
     699             :  * page mapping attempt fails.
     700             :  *
     701             :  * Returns 1 on success and 0 on failure.
     702             :  */
     703           0 : int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
     704             : {
     705           0 :         u8 mtrr, uniform;
     706             : 
     707           0 :         mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
     708           0 :         if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
     709             :             (mtrr != MTRR_TYPE_WRBACK))
     710             :                 return 0;
     711             : 
     712             :         /* Bail out if we are we on a populated non-leaf entry: */
     713           0 :         if (pud_present(*pud) && !pud_huge(*pud))
     714           0 :                 return 0;
     715             : 
     716           0 :         set_pte((pte_t *)pud, pfn_pte(
     717             :                 (u64)addr >> PAGE_SHIFT,
     718             :                 __pgprot(protval_4k_2_large(pgprot_val(prot)) | _PAGE_PSE)));
     719             : 
     720           0 :         return 1;
     721             : }
     722             : 
     723             : /**
     724             :  * pmd_set_huge - setup kernel PMD mapping
     725             :  *
     726             :  * See text over pud_set_huge() above.
     727             :  *
     728             :  * Returns 1 on success and 0 on failure.
     729             :  */
     730          68 : int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
     731             : {
     732          68 :         u8 mtrr, uniform;
     733             : 
     734          68 :         mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
     735          68 :         if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
     736             :             (mtrr != MTRR_TYPE_WRBACK)) {
     737           0 :                 pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n",
     738             :                              __func__, addr, addr + PMD_SIZE);
     739           0 :                 return 0;
     740             :         }
     741             : 
     742             :         /* Bail out if we are we on a populated non-leaf entry: */
     743         136 :         if (pmd_present(*pmd) && !pmd_huge(*pmd))
     744           0 :                 return 0;
     745             : 
     746          68 :         set_pte((pte_t *)pmd, pfn_pte(
     747             :                 (u64)addr >> PAGE_SHIFT,
     748             :                 __pgprot(protval_4k_2_large(pgprot_val(prot)) | _PAGE_PSE)));
     749             : 
     750          68 :         return 1;
     751             : }
     752             : 
     753             : /**
     754             :  * pud_clear_huge - clear kernel PUD mapping when it is set
     755             :  *
     756             :  * Returns 1 on success and 0 on failure (no PUD map is found).
     757             :  */
     758       36352 : int pud_clear_huge(pud_t *pud)
     759             : {
     760       36352 :         if (pud_large(*pud)) {
     761           0 :                 pud_clear(pud);
     762           0 :                 return 1;
     763             :         }
     764             : 
     765             :         return 0;
     766             : }
     767             : 
     768             : /**
     769             :  * pmd_clear_huge - clear kernel PMD mapping when it is set
     770             :  *
     771             :  * Returns 1 on success and 0 on failure (no PMD map is found).
     772             :  */
     773       36414 : int pmd_clear_huge(pmd_t *pmd)
     774             : {
     775       72828 :         if (pmd_large(*pmd)) {
     776           0 :                 pmd_clear(pmd);
     777           0 :                 return 1;
     778             :         }
     779             : 
     780             :         return 0;
     781             : }
     782             : 
     783             : /*
     784             :  * Until we support 512GB pages, skip them in the vmap area.
     785             :  */
     786           0 : int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
     787             : {
     788           0 :         return 0;
     789             : }
     790             : 
     791             : #ifdef CONFIG_X86_64
     792             : /**
     793             :  * pud_free_pmd_page - Clear pud entry and free pmd page.
     794             :  * @pud: Pointer to a PUD.
     795             :  * @addr: Virtual address associated with pud.
     796             :  *
     797             :  * Context: The pud range has been unmapped and TLB purged.
     798             :  * Return: 1 if clearing the entry succeeded. 0 otherwise.
     799             :  *
     800             :  * NOTE: Callers must allow a single page allocation.
     801             :  */
     802           0 : int pud_free_pmd_page(pud_t *pud, unsigned long addr)
     803             : {
     804           0 :         pmd_t *pmd, *pmd_sv;
     805           0 :         pte_t *pte;
     806           0 :         int i;
     807             : 
     808           0 :         pmd = (pmd_t *)pud_page_vaddr(*pud);
     809           0 :         pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL);
     810           0 :         if (!pmd_sv)
     811             :                 return 0;
     812             : 
     813           0 :         for (i = 0; i < PTRS_PER_PMD; i++) {
     814           0 :                 pmd_sv[i] = pmd[i];
     815           0 :                 if (!pmd_none(pmd[i]))
     816           0 :                         pmd_clear(&pmd[i]);
     817             :         }
     818             : 
     819           0 :         pud_clear(pud);
     820             : 
     821             :         /* INVLPG to clear all paging-structure caches */
     822           0 :         flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
     823             : 
     824           0 :         for (i = 0; i < PTRS_PER_PMD; i++) {
     825           0 :                 if (!pmd_none(pmd_sv[i])) {
     826           0 :                         pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]);
     827           0 :                         free_page((unsigned long)pte);
     828             :                 }
     829             :         }
     830             : 
     831           0 :         free_page((unsigned long)pmd_sv);
     832             : 
     833           0 :         pgtable_pmd_page_dtor(virt_to_page(pmd));
     834           0 :         free_page((unsigned long)pmd);
     835             : 
     836           0 :         return 1;
     837             : }
     838             : 
     839             : /**
     840             :  * pmd_free_pte_page - Clear pmd entry and free pte page.
     841             :  * @pmd: Pointer to a PMD.
     842             :  * @addr: Virtual address associated with pmd.
     843             :  *
     844             :  * Context: The pmd range has been unmapped and TLB purged.
     845             :  * Return: 1 if clearing the entry succeeded. 0 otherwise.
     846             :  */
     847           0 : int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
     848             : {
     849           0 :         pte_t *pte;
     850             : 
     851           0 :         pte = (pte_t *)pmd_page_vaddr(*pmd);
     852           0 :         pmd_clear(pmd);
     853             : 
     854             :         /* INVLPG to clear all paging-structure caches */
     855           0 :         flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
     856             : 
     857           0 :         free_page((unsigned long)pte);
     858             : 
     859           0 :         return 1;
     860             : }
     861             : 
     862             : #else /* !CONFIG_X86_64 */
     863             : 
     864             : int pud_free_pmd_page(pud_t *pud, unsigned long addr)
     865             : {
     866             :         return pud_none(*pud);
     867             : }
     868             : 
     869             : /*
     870             :  * Disable free page handling on x86-PAE. This assures that ioremap()
     871             :  * does not update sync'd pmd entries. See vmalloc_sync_one().
     872             :  */
     873             : int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
     874             : {
     875             :         return pmd_none(*pmd);
     876             : }
     877             : 
     878             : #endif /* CONFIG_X86_64 */
     879             : #endif  /* CONFIG_HAVE_ARCH_HUGE_VMAP */

Generated by: LCOV version 1.14