LCOV - code coverage report
Current view: top level - arch/x86/mm/pat - set_memory.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 353 818 43.2 %
Date: 2021-04-22 12:43:58 Functions: 30 83 36.1 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  * Copyright 2002 Andi Kleen, SuSE Labs.
       4             :  * Thanks to Ben LaHaise for precious feedback.
       5             :  */
       6             : #include <linux/highmem.h>
       7             : #include <linux/memblock.h>
       8             : #include <linux/sched.h>
       9             : #include <linux/mm.h>
      10             : #include <linux/interrupt.h>
      11             : #include <linux/seq_file.h>
      12             : #include <linux/debugfs.h>
      13             : #include <linux/pfn.h>
      14             : #include <linux/percpu.h>
      15             : #include <linux/gfp.h>
      16             : #include <linux/pci.h>
      17             : #include <linux/vmalloc.h>
      18             : #include <linux/libnvdimm.h>
      19             : 
      20             : #include <asm/e820/api.h>
      21             : #include <asm/processor.h>
      22             : #include <asm/tlbflush.h>
      23             : #include <asm/sections.h>
      24             : #include <asm/setup.h>
      25             : #include <linux/uaccess.h>
      26             : #include <asm/pgalloc.h>
      27             : #include <asm/proto.h>
      28             : #include <asm/memtype.h>
      29             : #include <asm/set_memory.h>
      30             : 
      31             : #include "../mm_internal.h"
      32             : 
      33             : /*
      34             :  * The current flushing context - we pass it instead of 5 arguments:
      35             :  */
      36             : struct cpa_data {
      37             :         unsigned long   *vaddr;
      38             :         pgd_t           *pgd;
      39             :         pgprot_t        mask_set;
      40             :         pgprot_t        mask_clr;
      41             :         unsigned long   numpages;
      42             :         unsigned long   curpage;
      43             :         unsigned long   pfn;
      44             :         unsigned int    flags;
      45             :         unsigned int    force_split             : 1,
      46             :                         force_static_prot       : 1,
      47             :                         force_flush_all         : 1;
      48             :         struct page     **pages;
      49             : };
      50             : 
      51             : enum cpa_warn {
      52             :         CPA_CONFLICT,
      53             :         CPA_PROTECT,
      54             :         CPA_DETECT,
      55             : };
      56             : 
      57             : static const int cpa_warn_level = CPA_PROTECT;
      58             : 
      59             : /*
      60             :  * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings)
      61             :  * using cpa_lock. So that we don't allow any other cpu, with stale large tlb
      62             :  * entries change the page attribute in parallel to some other cpu
      63             :  * splitting a large page entry along with changing the attribute.
      64             :  */
      65             : static DEFINE_SPINLOCK(cpa_lock);
      66             : 
      67             : #define CPA_FLUSHTLB 1
      68             : #define CPA_ARRAY 2
      69             : #define CPA_PAGES_ARRAY 4
      70             : #define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */
      71             : 
      72           0 : static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm)
      73             : {
      74           0 :         return __pgprot(cachemode2protval(pcm));
      75             : }
      76             : 
      77             : #ifdef CONFIG_PROC_FS
      78             : static unsigned long direct_pages_count[PG_LEVEL_NUM];
      79             : 
      80          12 : void update_page_count(int level, unsigned long pages)
      81             : {
      82             :         /* Protect against CPA */
      83          12 :         spin_lock(&pgd_lock);
      84          12 :         direct_pages_count[level] += pages;
      85          12 :         spin_unlock(&pgd_lock);
      86          12 : }
      87             : 
      88          14 : static void split_page_count(int level)
      89             : {
      90          14 :         if (direct_pages_count[level] == 0)
      91             :                 return;
      92             : 
      93          14 :         direct_pages_count[level]--;
      94          14 :         direct_pages_count[level - 1] += PTRS_PER_PTE;
      95             : }
      96             : 
      97           1 : void arch_report_meminfo(struct seq_file *m)
      98             : {
      99           1 :         seq_printf(m, "DirectMap4k:    %8lu kB\n",
     100           1 :                         direct_pages_count[PG_LEVEL_4K] << 2);
     101             : #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
     102           1 :         seq_printf(m, "DirectMap2M:    %8lu kB\n",
     103           1 :                         direct_pages_count[PG_LEVEL_2M] << 11);
     104             : #else
     105             :         seq_printf(m, "DirectMap4M:    %8lu kB\n",
     106             :                         direct_pages_count[PG_LEVEL_2M] << 12);
     107             : #endif
     108           1 :         if (direct_gbpages)
     109           1 :                 seq_printf(m, "DirectMap1G:    %8lu kB\n",
     110           1 :                         direct_pages_count[PG_LEVEL_1G] << 20);
     111           1 : }
     112             : #else
     113             : static inline void split_page_count(int level) { }
     114             : #endif
     115             : 
     116             : #ifdef CONFIG_X86_CPA_STATISTICS
     117             : 
     118             : static unsigned long cpa_1g_checked;
     119             : static unsigned long cpa_1g_sameprot;
     120             : static unsigned long cpa_1g_preserved;
     121             : static unsigned long cpa_2m_checked;
     122             : static unsigned long cpa_2m_sameprot;
     123             : static unsigned long cpa_2m_preserved;
     124             : static unsigned long cpa_4k_install;
     125             : 
     126             : static inline void cpa_inc_1g_checked(void)
     127             : {
     128             :         cpa_1g_checked++;
     129             : }
     130             : 
     131             : static inline void cpa_inc_2m_checked(void)
     132             : {
     133             :         cpa_2m_checked++;
     134             : }
     135             : 
     136             : static inline void cpa_inc_4k_install(void)
     137             : {
     138             :         data_race(cpa_4k_install++);
     139             : }
     140             : 
     141             : static inline void cpa_inc_lp_sameprot(int level)
     142             : {
     143             :         if (level == PG_LEVEL_1G)
     144             :                 cpa_1g_sameprot++;
     145             :         else
     146             :                 cpa_2m_sameprot++;
     147             : }
     148             : 
     149             : static inline void cpa_inc_lp_preserved(int level)
     150             : {
     151             :         if (level == PG_LEVEL_1G)
     152             :                 cpa_1g_preserved++;
     153             :         else
     154             :                 cpa_2m_preserved++;
     155             : }
     156             : 
     157             : static int cpastats_show(struct seq_file *m, void *p)
     158             : {
     159             :         seq_printf(m, "1G pages checked:     %16lu\n", cpa_1g_checked);
     160             :         seq_printf(m, "1G pages sameprot:    %16lu\n", cpa_1g_sameprot);
     161             :         seq_printf(m, "1G pages preserved:   %16lu\n", cpa_1g_preserved);
     162             :         seq_printf(m, "2M pages checked:     %16lu\n", cpa_2m_checked);
     163             :         seq_printf(m, "2M pages sameprot:    %16lu\n", cpa_2m_sameprot);
     164             :         seq_printf(m, "2M pages preserved:   %16lu\n", cpa_2m_preserved);
     165             :         seq_printf(m, "4K pages set-checked: %16lu\n", cpa_4k_install);
     166             :         return 0;
     167             : }
     168             : 
     169             : static int cpastats_open(struct inode *inode, struct file *file)
     170             : {
     171             :         return single_open(file, cpastats_show, NULL);
     172             : }
     173             : 
     174             : static const struct file_operations cpastats_fops = {
     175             :         .open           = cpastats_open,
     176             :         .read           = seq_read,
     177             :         .llseek         = seq_lseek,
     178             :         .release        = single_release,
     179             : };
     180             : 
     181             : static int __init cpa_stats_init(void)
     182             : {
     183             :         debugfs_create_file("cpa_stats", S_IRUSR, arch_debugfs_dir, NULL,
     184             :                             &cpastats_fops);
     185             :         return 0;
     186             : }
     187             : late_initcall(cpa_stats_init);
     188             : #else
     189           0 : static inline void cpa_inc_1g_checked(void) { }
     190         153 : static inline void cpa_inc_2m_checked(void) { }
     191        4234 : static inline void cpa_inc_4k_install(void) { }
     192             : static inline void cpa_inc_lp_sameprot(int level) { }
     193          50 : static inline void cpa_inc_lp_preserved(int level) { }
     194             : #endif
     195             : 
     196             : 
     197             : static inline int
     198        1976 : within(unsigned long addr, unsigned long start, unsigned long end)
     199             : {
     200        1976 :         return addr >= start && addr < end;
     201             : }
     202             : 
     203             : static inline int
     204           9 : within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
     205             : {
     206           9 :         return addr >= start && addr <= end;
     207             : }
     208             : 
     209             : #ifdef CONFIG_X86_64
     210             : 
     211           9 : static inline unsigned long highmap_start_pfn(void)
     212             : {
     213           9 :         return __pa_symbol(_text) >> PAGE_SHIFT;
     214             : }
     215             : 
     216           9 : static inline unsigned long highmap_end_pfn(void)
     217             : {
     218             :         /* Do not reference physical address outside the kernel. */
     219           9 :         return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT;
     220             : }
     221             : 
     222           9 : static bool __cpa_pfn_in_highmap(unsigned long pfn)
     223             : {
     224             :         /*
     225             :          * Kernel text has an alias mapping at a high address, known
     226             :          * here as "highmap".
     227             :          */
     228           9 :         return within_inclusive(pfn, highmap_start_pfn(), highmap_end_pfn());
     229             : }
     230             : 
     231             : #else
     232             : 
     233             : static bool __cpa_pfn_in_highmap(unsigned long pfn)
     234             : {
     235             :         /* There is no highmap on 32-bit */
     236             :         return false;
     237             : }
     238             : 
     239             : #endif
     240             : 
     241             : /*
     242             :  * See set_mce_nospec().
     243             :  *
     244             :  * Machine check recovery code needs to change cache mode of poisoned pages to
     245             :  * UC to avoid speculative access logging another error. But passing the
     246             :  * address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a
     247             :  * speculative access. So we cheat and flip the top bit of the address. This
     248             :  * works fine for the code that updates the page tables. But at the end of the
     249             :  * process we need to flush the TLB and cache and the non-canonical address
     250             :  * causes a #GP fault when used by the INVLPG and CLFLUSH instructions.
     251             :  *
     252             :  * But in the common case we already have a canonical address. This code
     253             :  * will fix the top bit if needed and is a no-op otherwise.
     254             :  */
     255           7 : static inline unsigned long fix_addr(unsigned long addr)
     256             : {
     257             : #ifdef CONFIG_X86_64
     258           7 :         return (long)(addr << 1) >> 1;
     259             : #else
     260             :         return addr;
     261             : #endif
     262             : }
     263             : 
     264        5368 : static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx)
     265             : {
     266        5368 :         if (cpa->flags & CPA_PAGES_ARRAY) {
     267           0 :                 struct page *page = cpa->pages[idx];
     268             : 
     269           0 :                 if (unlikely(PageHighMem(page)))
     270             :                         return 0;
     271             : 
     272           0 :                 return (unsigned long)page_address(page);
     273             :         }
     274             : 
     275        5368 :         if (cpa->flags & CPA_ARRAY)
     276           0 :                 return cpa->vaddr[idx];
     277             : 
     278        5368 :         return *cpa->vaddr + idx * PAGE_SIZE;
     279             : }
     280             : 
     281             : /*
     282             :  * Flushing functions
     283             :  */
     284             : 
     285           0 : static void clflush_cache_range_opt(void *vaddr, unsigned int size)
     286             : {
     287           0 :         const unsigned long clflush_size = boot_cpu_data.x86_clflush_size;
     288           0 :         void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1));
     289           0 :         void *vend = vaddr + size;
     290             : 
     291           0 :         if (p >= vend)
     292             :                 return;
     293             : 
     294           0 :         for (; p < vend; p += clflush_size)
     295           0 :                 clflushopt(p);
     296             : }
     297             : 
     298             : /**
     299             :  * clflush_cache_range - flush a cache range with clflush
     300             :  * @vaddr:      virtual start address
     301             :  * @size:       number of bytes to flush
     302             :  *
     303             :  * CLFLUSHOPT is an unordered instruction which needs fencing with MFENCE or
     304             :  * SFENCE to avoid ordering issues.
     305             :  */
     306           0 : void clflush_cache_range(void *vaddr, unsigned int size)
     307             : {
     308           0 :         mb();
     309           0 :         clflush_cache_range_opt(vaddr, size);
     310           0 :         mb();
     311           0 : }
     312             : EXPORT_SYMBOL_GPL(clflush_cache_range);
     313             : 
     314             : #ifdef CONFIG_ARCH_HAS_PMEM_API
     315           0 : void arch_invalidate_pmem(void *addr, size_t size)
     316             : {
     317           0 :         clflush_cache_range(addr, size);
     318           0 : }
     319             : EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
     320             : #endif
     321             : 
     322           0 : static void __cpa_flush_all(void *arg)
     323             : {
     324           0 :         unsigned long cache = (unsigned long)arg;
     325             : 
     326             :         /*
     327             :          * Flush all to work around Errata in early athlons regarding
     328             :          * large page flushing.
     329             :          */
     330           0 :         __flush_tlb_all();
     331             : 
     332           0 :         if (cache && boot_cpu_data.x86 >= 4)
     333           0 :                 wbinvd();
     334           0 : }
     335             : 
     336           0 : static void cpa_flush_all(unsigned long cache)
     337             : {
     338           0 :         BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
     339             : 
     340           0 :         on_each_cpu(__cpa_flush_all, (void *) cache, 1);
     341           0 : }
     342             : 
     343           3 : static void __cpa_flush_tlb(void *data)
     344             : {
     345           3 :         struct cpa_data *cpa = data;
     346           3 :         unsigned int i;
     347             : 
     348          10 :         for (i = 0; i < cpa->numpages; i++)
     349           7 :                 flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i)));
     350           3 : }
     351             : 
     352          16 : static void cpa_flush(struct cpa_data *data, int cache)
     353             : {
     354          16 :         struct cpa_data *cpa = data;
     355          16 :         unsigned int i;
     356             : 
     357          16 :         BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
     358             : 
     359          16 :         if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
     360           0 :                 cpa_flush_all(cache);
     361           0 :                 return;
     362             :         }
     363             : 
     364          16 :         if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling)
     365          13 :                 flush_tlb_all();
     366             :         else
     367           3 :                 on_each_cpu(__cpa_flush_tlb, cpa, 1);
     368             : 
     369          16 :         if (!cache)
     370             :                 return;
     371             : 
     372           0 :         mb();
     373           0 :         for (i = 0; i < cpa->numpages; i++) {
     374           0 :                 unsigned long addr = __cpa_addr(cpa, i);
     375           0 :                 unsigned int level;
     376             : 
     377           0 :                 pte_t *pte = lookup_address(addr, &level);
     378             : 
     379             :                 /*
     380             :                  * Only flush present addresses:
     381             :                  */
     382           0 :                 if (pte && (pte_val(*pte) & _PAGE_PRESENT))
     383           0 :                         clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE);
     384             :         }
     385           0 :         mb();
     386             : }
     387             : 
     388       11153 : static bool overlaps(unsigned long r1_start, unsigned long r1_end,
     389             :                      unsigned long r2_start, unsigned long r2_end)
     390             : {
     391       11153 :         return (r1_start <= r2_end && r1_end >= r2_start) ||
     392             :                 (r2_start <= r1_end && r2_end >= r1_start);
     393             : }
     394             : 
     395             : #ifdef CONFIG_PCI_BIOS
     396             : /*
     397             :  * The BIOS area between 640k and 1Mb needs to be executable for PCI BIOS
     398             :  * based config access (CONFIG_PCI_GOBIOS) support.
     399             :  */
     400             : #define BIOS_PFN        PFN_DOWN(BIOS_BEGIN)
     401             : #define BIOS_PFN_END    PFN_DOWN(BIOS_END - 1)
     402             : 
     403             : static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn)
     404             : {
     405             :         if (pcibios_enabled && overlaps(spfn, epfn, BIOS_PFN, BIOS_PFN_END))
     406             :                 return _PAGE_NX;
     407             :         return 0;
     408             : }
     409             : #else
     410        4437 : static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn)
     411             : {
     412        4437 :         return 0;
     413             : }
     414             : #endif
     415             : 
     416             : /*
     417             :  * The .rodata section needs to be read-only. Using the pfn catches all
     418             :  * aliases.  This also includes __ro_after_init, so do not enforce until
     419             :  * kernel_set_to_readonly is true.
     420             :  */
     421        4437 : static pgprotval_t protect_rodata(unsigned long spfn, unsigned long epfn)
     422             : {
     423        4437 :         unsigned long epfn_ro, spfn_ro = PFN_DOWN(__pa_symbol(__start_rodata));
     424             : 
     425             :         /*
     426             :          * Note: __end_rodata is at page aligned and not inclusive, so
     427             :          * subtract 1 to get the last enforced PFN in the rodata area.
     428             :          */
     429        4437 :         epfn_ro = PFN_DOWN(__pa_symbol(__end_rodata)) - 1;
     430             : 
     431        4437 :         if (kernel_set_to_readonly && overlaps(spfn, epfn, spfn_ro, epfn_ro))
     432           9 :                 return _PAGE_RW;
     433             :         return 0;
     434             : }
     435             : 
     436             : /*
     437             :  * Protect kernel text against becoming non executable by forbidding
     438             :  * _PAGE_NX.  This protects only the high kernel mapping (_text -> _etext)
     439             :  * out of which the kernel actually executes.  Do not protect the low
     440             :  * mapping.
     441             :  *
     442             :  * This does not cover __inittext since that is gone after boot.
     443             :  */
     444        4437 : static pgprotval_t protect_kernel_text(unsigned long start, unsigned long end)
     445             : {
     446        4437 :         unsigned long t_end = (unsigned long)_etext - 1;
     447        4437 :         unsigned long t_start = (unsigned long)_text;
     448             : 
     449        4437 :         if (overlaps(start, end, t_start, t_end))
     450          23 :                 return _PAGE_NX;
     451             :         return 0;
     452             : }
     453             : 
     454             : #if defined(CONFIG_X86_64)
     455             : /*
     456             :  * Once the kernel maps the text as RO (kernel_set_to_readonly is set),
     457             :  * kernel text mappings for the large page aligned text, rodata sections
     458             :  * will be always read-only. For the kernel identity mappings covering the
     459             :  * holes caused by this alignment can be anything that user asks.
     460             :  *
     461             :  * This will preserve the large page mappings for kernel text/data at no
     462             :  * extra cost.
     463             :  */
     464        4234 : static pgprotval_t protect_kernel_text_ro(unsigned long start,
     465             :                                           unsigned long end)
     466             : {
     467        4234 :         unsigned long t_end = (unsigned long)__end_rodata_hpage_align - 1;
     468        4234 :         unsigned long t_start = (unsigned long)_text;
     469        4234 :         unsigned int level;
     470             : 
     471        4234 :         if (!kernel_set_to_readonly || !overlaps(start, end, t_start, t_end))
     472             :                 return 0;
     473             :         /*
     474             :          * Don't enforce the !RW mapping for the kernel text mapping, if
     475             :          * the current mapping is already using small page mapping.  No
     476             :          * need to work hard to preserve large page mappings in this case.
     477             :          *
     478             :          * This also fixes the Linux Xen paravirt guest boot failure caused
     479             :          * by unexpected read-only mappings for kernel identity
     480             :          * mappings. In this paravirt guest case, the kernel text mapping
     481             :          * and the kernel identity mapping share the same page-table pages,
     482             :          * so the protections for kernel text and identity mappings have to
     483             :          * be the same.
     484             :          */
     485        1655 :         if (lookup_address(start, &level) && (level != PG_LEVEL_4K))
     486           0 :                 return _PAGE_RW;
     487             :         return 0;
     488             : }
     489             : #else
     490             : static pgprotval_t protect_kernel_text_ro(unsigned long start,
     491             :                                           unsigned long end)
     492             : {
     493             :         return 0;
     494             : }
     495             : #endif
     496             : 
     497       17395 : static inline bool conflicts(pgprot_t prot, pgprotval_t val)
     498             : {
     499       17395 :         return (pgprot_val(prot) & ~val) != pgprot_val(prot);
     500             : }
     501             : 
     502       17545 : static inline void check_conflict(int warnlvl, pgprot_t prot, pgprotval_t val,
     503             :                                   unsigned long start, unsigned long end,
     504             :                                   unsigned long pfn, const char *txt)
     505             : {
     506       17545 :         static const char *lvltxt[] = {
     507             :                 [CPA_CONFLICT]  = "conflict",
     508             :                 [CPA_PROTECT]   = "protect",
     509             :                 [CPA_DETECT]    = "detect",
     510             :         };
     511             : 
     512       17545 :         if (warnlvl > cpa_warn_level || !conflicts(prot, val))
     513             :                 return;
     514             : 
     515           0 :         pr_warn("CPA %8s %10s: 0x%016lx - 0x%016lx PFN %lx req %016llx prevent %016llx\n",
     516             :                 lvltxt[warnlvl], txt, start, end, pfn, (unsigned long long)pgprot_val(prot),
     517             :                 (unsigned long long)val);
     518             : }
     519             : 
     520             : /*
     521             :  * Certain areas of memory on x86 require very specific protection flags,
     522             :  * for example the BIOS area or kernel text. Callers don't always get this
     523             :  * right (again, ioremap() on BIOS memory is not uncommon) so this function
     524             :  * checks and fixes these known static required protection bits.
     525             :  */
     526        4437 : static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
     527             :                                           unsigned long pfn, unsigned long npg,
     528             :                                           unsigned long lpsize, int warnlvl)
     529             : {
     530        4437 :         pgprotval_t forbidden, res;
     531        4437 :         unsigned long end;
     532             : 
     533             :         /*
     534             :          * There is no point in checking RW/NX conflicts when the requested
     535             :          * mapping is setting the page !PRESENT.
     536             :          */
     537        4437 :         if (!(pgprot_val(prot) & _PAGE_PRESENT))
     538           0 :                 return prot;
     539             : 
     540             :         /* Operate on the virtual address */
     541        4437 :         end = start + npg * PAGE_SIZE - 1;
     542             : 
     543        4437 :         res = protect_kernel_text(start, end);
     544        4437 :         check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX");
     545        4437 :         forbidden = res;
     546             : 
     547             :         /*
     548             :          * Special case to preserve a large page. If the change spawns the
     549             :          * full large page mapping then there is no point to split it
     550             :          * up. Happens with ftrace and is going to be removed once ftrace
     551             :          * switched to text_poke().
     552             :          */
     553        4437 :         if (lpsize != (npg * PAGE_SIZE) || (start & (lpsize - 1))) {
     554        4234 :                 res = protect_kernel_text_ro(start, end);
     555        4234 :                 check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO");
     556        4234 :                 forbidden |= res;
     557             :         }
     558             : 
     559             :         /* Check the PFN directly */
     560        4437 :         res = protect_pci_bios(pfn, pfn + npg - 1);
     561        4437 :         check_conflict(warnlvl, prot, res, start, end, pfn, "PCIBIOS NX");
     562        4437 :         forbidden |= res;
     563             : 
     564        4437 :         res = protect_rodata(pfn, pfn + npg - 1);
     565        4437 :         check_conflict(warnlvl, prot, res, start, end, pfn, "Rodata RO");
     566        4437 :         forbidden |= res;
     567             : 
     568        4437 :         return __pgprot(pgprot_val(prot) & ~forbidden);
     569             : }
     570             : 
     571             : /*
     572             :  * Lookup the page table entry for a virtual address in a specific pgd.
     573             :  * Return a pointer to the entry and the level of the mapping.
     574             :  */
     575        6226 : pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
     576             :                              unsigned int *level)
     577             : {
     578        6226 :         p4d_t *p4d;
     579        6226 :         pud_t *pud;
     580        6226 :         pmd_t *pmd;
     581             : 
     582        6226 :         *level = PG_LEVEL_NONE;
     583             : 
     584        6226 :         if (pgd_none(*pgd))
     585             :                 return NULL;
     586             : 
     587        6226 :         p4d = p4d_offset(pgd, address);
     588        6226 :         if (p4d_none(*p4d))
     589             :                 return NULL;
     590             : 
     591        6226 :         *level = PG_LEVEL_512G;
     592        6226 :         if (p4d_large(*p4d) || !p4d_present(*p4d))
     593             :                 return (pte_t *)p4d;
     594             : 
     595        6226 :         pud = pud_offset(p4d, address);
     596        6226 :         if (pud_none(*pud))
     597             :                 return NULL;
     598             : 
     599        6226 :         *level = PG_LEVEL_1G;
     600       12452 :         if (pud_large(*pud) || !pud_present(*pud))
     601             :                 return (pte_t *)pud;
     602             : 
     603        6226 :         pmd = pmd_offset(pud, address);
     604        6226 :         if (pmd_none(*pmd))
     605             :                 return NULL;
     606             : 
     607        6226 :         *level = PG_LEVEL_2M;
     608       12119 :         if (pmd_large(*pmd) || !pmd_present(*pmd))
     609             :                 return (pte_t *)pmd;
     610             : 
     611        5893 :         *level = PG_LEVEL_4K;
     612             : 
     613       11786 :         return pte_offset_kernel(pmd, address);
     614             : }
     615             : 
     616             : /*
     617             :  * Lookup the page table entry for a virtual address. Return a pointer
     618             :  * to the entry and the level of the mapping.
     619             :  *
     620             :  * Note: We return pud and pmd either when the entry is marked large
     621             :  * or when the present bit is not set. Otherwise we would return a
     622             :  * pointer to a nonexisting mapping.
     623             :  */
     624        6226 : pte_t *lookup_address(unsigned long address, unsigned int *level)
     625             : {
     626        6226 :         return lookup_address_in_pgd(pgd_offset_k(address), address, level);
     627             : }
     628             : EXPORT_SYMBOL_GPL(lookup_address);
     629             : 
     630             : /*
     631             :  * Lookup the page table entry for a virtual address in a given mm. Return a
     632             :  * pointer to the entry and the level of the mapping.
     633             :  */
     634           0 : pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address,
     635             :                             unsigned int *level)
     636             : {
     637           0 :         return lookup_address_in_pgd(pgd_offset(mm, address), address, level);
     638             : }
     639             : EXPORT_SYMBOL_GPL(lookup_address_in_mm);
     640             : 
     641        4554 : static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
     642             :                                   unsigned int *level)
     643             : {
     644        4554 :         if (cpa->pgd)
     645           0 :                 return lookup_address_in_pgd(cpa->pgd + pgd_index(address),
     646             :                                                address, level);
     647             : 
     648        4554 :         return lookup_address(address, level);
     649             : }
     650             : 
     651             : /*
     652             :  * Lookup the PMD entry for a virtual address. Return a pointer to the entry
     653             :  * or NULL if not present.
     654             :  */
     655           0 : pmd_t *lookup_pmd_address(unsigned long address)
     656             : {
     657           0 :         pgd_t *pgd;
     658           0 :         p4d_t *p4d;
     659           0 :         pud_t *pud;
     660             : 
     661           0 :         pgd = pgd_offset_k(address);
     662           0 :         if (pgd_none(*pgd))
     663             :                 return NULL;
     664             : 
     665           0 :         p4d = p4d_offset(pgd, address);
     666           0 :         if (p4d_none(*p4d) || p4d_large(*p4d) || !p4d_present(*p4d))
     667           0 :                 return NULL;
     668             : 
     669           0 :         pud = pud_offset(p4d, address);
     670           0 :         if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
     671             :                 return NULL;
     672             : 
     673           0 :         return pmd_offset(pud, address);
     674             : }
     675             : 
     676             : /*
     677             :  * This is necessary because __pa() does not work on some
     678             :  * kinds of memory, like vmalloc() or the alloc_remap()
     679             :  * areas on 32-bit NUMA systems.  The percpu areas can
     680             :  * end up in this kind of memory, for instance.
     681             :  *
     682             :  * This could be optimized, but it is only intended to be
     683             :  * used at inititalization time, and keeping it
     684             :  * unoptimized should increase the testing coverage for
     685             :  * the more obscure platforms.
     686             :  */
     687          17 : phys_addr_t slow_virt_to_phys(void *__virt_addr)
     688             : {
     689          17 :         unsigned long virt_addr = (unsigned long)__virt_addr;
     690          17 :         phys_addr_t phys_addr;
     691          17 :         unsigned long offset;
     692          17 :         enum pg_level level;
     693          17 :         pte_t *pte;
     694             : 
     695          17 :         pte = lookup_address(virt_addr, &level);
     696          17 :         BUG_ON(!pte);
     697             : 
     698             :         /*
     699             :          * pXX_pfn() returns unsigned long, which must be cast to phys_addr_t
     700             :          * before being left-shifted PAGE_SHIFT bits -- this trick is to
     701             :          * make 32-PAE kernel work correctly.
     702             :          */
     703          17 :         switch (level) {
     704           0 :         case PG_LEVEL_1G:
     705           0 :                 phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
     706           0 :                 offset = virt_addr & ~PUD_PAGE_MASK;
     707           0 :                 break;
     708          13 :         case PG_LEVEL_2M:
     709          13 :                 phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
     710          13 :                 offset = virt_addr & ~PMD_PAGE_MASK;
     711          13 :                 break;
     712           4 :         default:
     713           4 :                 phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
     714           4 :                 offset = virt_addr & ~PAGE_MASK;
     715             :         }
     716             : 
     717          17 :         return (phys_addr_t)(phys_addr | offset);
     718             : }
     719             : EXPORT_SYMBOL_GPL(slow_virt_to_phys);
     720             : 
     721             : /*
     722             :  * Set the new pmd in all the pgds we know about:
     723             :  */
     724          64 : static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
     725             : {
     726             :         /* change init_mm */
     727          64 :         set_pte_atomic(kpte, pte);
     728             : #ifdef CONFIG_X86_32
     729             :         if (!SHARED_KERNEL_PMD) {
     730             :                 struct page *page;
     731             : 
     732             :                 list_for_each_entry(page, &pgd_list, lru) {
     733             :                         pgd_t *pgd;
     734             :                         p4d_t *p4d;
     735             :                         pud_t *pud;
     736             :                         pmd_t *pmd;
     737             : 
     738             :                         pgd = (pgd_t *)page_address(page) + pgd_index(address);
     739             :                         p4d = p4d_offset(pgd, address);
     740             :                         pud = pud_offset(p4d, address);
     741             :                         pmd = pmd_offset(pud, address);
     742             :                         set_pte_atomic((pte_t *)pmd, pte);
     743             :                 }
     744             :         }
     745             : #endif
     746             : }
     747             : 
     748        4401 : static pgprot_t pgprot_clear_protnone_bits(pgprot_t prot)
     749             : {
     750             :         /*
     751             :          * _PAGE_GLOBAL means "global page" for present PTEs.
     752             :          * But, it is also used to indicate _PAGE_PROTNONE
     753             :          * for non-present PTEs.
     754             :          *
     755             :          * This ensures that a _PAGE_GLOBAL PTE going from
     756             :          * present to non-present is not confused as
     757             :          * _PAGE_PROTNONE.
     758             :          */
     759        4401 :         if (!(pgprot_val(prot) & _PAGE_PRESENT))
     760           0 :                 pgprot_val(prot) &= ~_PAGE_GLOBAL;
     761             : 
     762        4401 :         return prot;
     763             : }
     764             : 
     765         153 : static int __should_split_large_page(pte_t *kpte, unsigned long address,
     766             :                                      struct cpa_data *cpa)
     767             : {
     768         153 :         unsigned long numpages, pmask, psize, lpaddr, pfn, old_pfn;
     769         153 :         pgprot_t old_prot, new_prot, req_prot, chk_prot;
     770         153 :         pte_t new_pte, *tmp;
     771         153 :         enum pg_level level;
     772             : 
     773             :         /*
     774             :          * Check for races, another CPU might have split this page
     775             :          * up already:
     776             :          */
     777         153 :         tmp = _lookup_address_cpa(cpa, address, &level);
     778         153 :         if (tmp != kpte)
     779             :                 return 1;
     780             : 
     781         153 :         switch (level) {
     782         153 :         case PG_LEVEL_2M:
     783         153 :                 old_prot = pmd_pgprot(*(pmd_t *)kpte);
     784         153 :                 old_pfn = pmd_pfn(*(pmd_t *)kpte);
     785         153 :                 cpa_inc_2m_checked();
     786             :                 break;
     787           0 :         case PG_LEVEL_1G:
     788           0 :                 old_prot = pud_pgprot(*(pud_t *)kpte);
     789           0 :                 old_pfn = pud_pfn(*(pud_t *)kpte);
     790           0 :                 cpa_inc_1g_checked();
     791             :                 break;
     792             :         default:
     793             :                 return -EINVAL;
     794             :         }
     795             : 
     796         153 :         psize = page_level_size(level);
     797         153 :         pmask = page_level_mask(level);
     798             : 
     799             :         /*
     800             :          * Calculate the number of pages, which fit into this large
     801             :          * page starting at address:
     802             :          */
     803         153 :         lpaddr = (address + psize) & pmask;
     804         153 :         numpages = (lpaddr - address) >> PAGE_SHIFT;
     805         153 :         if (numpages < cpa->numpages)
     806          36 :                 cpa->numpages = numpages;
     807             : 
     808             :         /*
     809             :          * We are safe now. Check whether the new pgprot is the same:
     810             :          * Convert protection attributes to 4k-format, as cpa->mask* are set
     811             :          * up accordingly.
     812             :          */
     813             : 
     814             :         /* Clear PSE (aka _PAGE_PAT) and move PAT bit to correct position */
     815         153 :         req_prot = pgprot_large_2_4k(old_prot);
     816             : 
     817         153 :         pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
     818         153 :         pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
     819             : 
     820             :         /*
     821             :          * req_prot is in format of 4k pages. It must be converted to large
     822             :          * page format: the caching mode includes the PAT bit located at
     823             :          * different bit positions in the two formats.
     824             :          */
     825         153 :         req_prot = pgprot_4k_2_large(req_prot);
     826         153 :         req_prot = pgprot_clear_protnone_bits(req_prot);
     827         153 :         if (pgprot_val(req_prot) & _PAGE_PRESENT)
     828         153 :                 pgprot_val(req_prot) |= _PAGE_PSE;
     829             : 
     830             :         /*
     831             :          * old_pfn points to the large page base pfn. So we need to add the
     832             :          * offset of the virtual address:
     833             :          */
     834         153 :         pfn = old_pfn + ((address & (psize - 1)) >> PAGE_SHIFT);
     835         153 :         cpa->pfn = pfn;
     836             : 
     837             :         /*
     838             :          * Calculate the large page base address and the number of 4K pages
     839             :          * in the large page
     840             :          */
     841         153 :         lpaddr = address & pmask;
     842         153 :         numpages = psize >> PAGE_SHIFT;
     843             : 
     844             :         /*
     845             :          * Sanity check that the existing mapping is correct versus the static
     846             :          * protections. static_protections() guards against !PRESENT, so no
     847             :          * extra conditional required here.
     848             :          */
     849         153 :         chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages,
     850             :                                       psize, CPA_CONFLICT);
     851             : 
     852         153 :         if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) {
     853             :                 /*
     854             :                  * Split the large page and tell the split code to
     855             :                  * enforce static protections.
     856             :                  */
     857           0 :                 cpa->force_static_prot = 1;
     858           0 :                 return 1;
     859             :         }
     860             : 
     861             :         /*
     862             :          * Optimization: If the requested pgprot is the same as the current
     863             :          * pgprot, then the large page can be preserved and no updates are
     864             :          * required independent of alignment and length of the requested
     865             :          * range. The above already established that the current pgprot is
     866             :          * correct, which in consequence makes the requested pgprot correct
     867             :          * as well if it is the same. The static protection scan below will
     868             :          * not come to a different conclusion.
     869             :          */
     870         153 :         if (pgprot_val(req_prot) == pgprot_val(old_prot)) {
     871         153 :                 cpa_inc_lp_sameprot(level);
     872             :                 return 0;
     873             :         }
     874             : 
     875             :         /*
     876             :          * If the requested range does not cover the full page, split it up
     877             :          */
     878          64 :         if (address != lpaddr || cpa->numpages != numpages)
     879             :                 return 1;
     880             : 
     881             :         /*
     882             :          * Check whether the requested pgprot is conflicting with a static
     883             :          * protection requirement in the large page.
     884             :          */
     885          50 :         new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
     886             :                                       psize, CPA_DETECT);
     887             : 
     888             :         /*
     889             :          * If there is a conflict, split the large page.
     890             :          *
     891             :          * There used to be a 4k wise evaluation trying really hard to
     892             :          * preserve the large pages, but experimentation has shown, that this
     893             :          * does not help at all. There might be corner cases which would
     894             :          * preserve one large page occasionally, but it's really not worth the
     895             :          * extra code and cycles for the common case.
     896             :          */
     897          50 :         if (pgprot_val(req_prot) != pgprot_val(new_prot))
     898             :                 return 1;
     899             : 
     900             :         /* All checks passed. Update the large page mapping. */
     901          50 :         new_pte = pfn_pte(old_pfn, new_prot);
     902          50 :         __set_pmd_pte(kpte, address, new_pte);
     903          50 :         cpa->flags |= CPA_FLUSHTLB;
     904          50 :         cpa_inc_lp_preserved(level);
     905          50 :         return 0;
     906             : }
     907             : 
     908         153 : static int should_split_large_page(pte_t *kpte, unsigned long address,
     909             :                                    struct cpa_data *cpa)
     910             : {
     911         153 :         int do_split;
     912             : 
     913         153 :         if (cpa->force_split)
     914             :                 return 1;
     915             : 
     916         153 :         spin_lock(&pgd_lock);
     917         153 :         do_split = __should_split_large_page(kpte, address, cpa);
     918         153 :         spin_unlock(&pgd_lock);
     919             : 
     920         153 :         return do_split;
     921             : }
     922             : 
     923        7168 : static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn,
     924             :                           pgprot_t ref_prot, unsigned long address,
     925             :                           unsigned long size)
     926             : {
     927        7168 :         unsigned int npg = PFN_DOWN(size);
     928        7168 :         pgprot_t prot;
     929             : 
     930             :         /*
     931             :          * If should_split_large_page() discovered an inconsistent mapping,
     932             :          * remove the invalid protection in the split mapping.
     933             :          */
     934        7168 :         if (!cpa->force_static_prot)
     935        7168 :                 goto set;
     936             : 
     937             :         /* Hand in lpsize = 0 to enforce the protection mechanism */
     938           0 :         prot = static_protections(ref_prot, address, pfn, npg, 0, CPA_PROTECT);
     939             : 
     940           0 :         if (pgprot_val(prot) == pgprot_val(ref_prot))
     941           0 :                 goto set;
     942             : 
     943             :         /*
     944             :          * If this is splitting a PMD, fix it up. PUD splits cannot be
     945             :          * fixed trivially as that would require to rescan the newly
     946             :          * installed PMD mappings after returning from split_large_page()
     947             :          * so an eventual further split can allocate the necessary PTE
     948             :          * pages. Warn for now and revisit it in case this actually
     949             :          * happens.
     950             :          */
     951           0 :         if (size == PAGE_SIZE)
     952             :                 ref_prot = prot;
     953             :         else
     954           0 :                 pr_warn_once("CPA: Cannot fixup static protections for PUD split\n");
     955        7168 : set:
     956        7168 :         set_pte(pte, pfn_pte(pfn, ref_prot));
     957        7168 : }
     958             : 
     959             : static int
     960          14 : __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
     961             :                    struct page *base)
     962             : {
     963          14 :         unsigned long lpaddr, lpinc, ref_pfn, pfn, pfninc = 1;
     964          14 :         pte_t *pbase = (pte_t *)page_address(base);
     965          14 :         unsigned int i, level;
     966          14 :         pgprot_t ref_prot;
     967          14 :         pte_t *tmp;
     968             : 
     969          14 :         spin_lock(&pgd_lock);
     970             :         /*
     971             :          * Check for races, another CPU might have split this page
     972             :          * up for us already:
     973             :          */
     974          14 :         tmp = _lookup_address_cpa(cpa, address, &level);
     975          14 :         if (tmp != kpte) {
     976           0 :                 spin_unlock(&pgd_lock);
     977           0 :                 return 1;
     978             :         }
     979             : 
     980          14 :         paravirt_alloc_pte(&init_mm, page_to_pfn(base));
     981             : 
     982          14 :         switch (level) {
     983          14 :         case PG_LEVEL_2M:
     984          14 :                 ref_prot = pmd_pgprot(*(pmd_t *)kpte);
     985             :                 /*
     986             :                  * Clear PSE (aka _PAGE_PAT) and move
     987             :                  * PAT bit to correct position.
     988             :                  */
     989          14 :                 ref_prot = pgprot_large_2_4k(ref_prot);
     990          14 :                 ref_pfn = pmd_pfn(*(pmd_t *)kpte);
     991          14 :                 lpaddr = address & PMD_MASK;
     992          14 :                 lpinc = PAGE_SIZE;
     993          14 :                 break;
     994             : 
     995           0 :         case PG_LEVEL_1G:
     996           0 :                 ref_prot = pud_pgprot(*(pud_t *)kpte);
     997           0 :                 ref_pfn = pud_pfn(*(pud_t *)kpte);
     998           0 :                 pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
     999           0 :                 lpaddr = address & PUD_MASK;
    1000           0 :                 lpinc = PMD_SIZE;
    1001             :                 /*
    1002             :                  * Clear the PSE flags if the PRESENT flag is not set
    1003             :                  * otherwise pmd_present/pmd_huge will return true
    1004             :                  * even on a non present pmd.
    1005             :                  */
    1006           0 :                 if (!(pgprot_val(ref_prot) & _PAGE_PRESENT))
    1007           0 :                         pgprot_val(ref_prot) &= ~_PAGE_PSE;
    1008             :                 break;
    1009             : 
    1010             :         default:
    1011           0 :                 spin_unlock(&pgd_lock);
    1012           0 :                 return 1;
    1013             :         }
    1014             : 
    1015          14 :         ref_prot = pgprot_clear_protnone_bits(ref_prot);
    1016             : 
    1017             :         /*
    1018             :          * Get the target pfn from the original entry:
    1019             :          */
    1020          14 :         pfn = ref_pfn;
    1021        7182 :         for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc, lpaddr += lpinc)
    1022        7168 :                 split_set_pte(cpa, pbase + i, pfn, ref_prot, lpaddr, lpinc);
    1023             : 
    1024          14 :         if (virt_addr_valid(address)) {
    1025          14 :                 unsigned long pfn = PFN_DOWN(__pa(address));
    1026             : 
    1027          14 :                 if (pfn_range_is_mapped(pfn, pfn + 1))
    1028          14 :                         split_page_count(level);
    1029             :         }
    1030             : 
    1031             :         /*
    1032             :          * Install the new, split up pagetable.
    1033             :          *
    1034             :          * We use the standard kernel pagetable protections for the new
    1035             :          * pagetable protections, the actual ptes set above control the
    1036             :          * primary protection behavior:
    1037             :          */
    1038          14 :         __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE)));
    1039             : 
    1040             :         /*
    1041             :          * Do a global flush tlb after splitting the large page
    1042             :          * and before we do the actual change page attribute in the PTE.
    1043             :          *
    1044             :          * Without this, we violate the TLB application note, that says:
    1045             :          * "The TLBs may contain both ordinary and large-page
    1046             :          *  translations for a 4-KByte range of linear addresses. This
    1047             :          *  may occur if software modifies the paging structures so that
    1048             :          *  the page size used for the address range changes. If the two
    1049             :          *  translations differ with respect to page frame or attributes
    1050             :          *  (e.g., permissions), processor behavior is undefined and may
    1051             :          *  be implementation-specific."
    1052             :          *
    1053             :          * We do this global tlb flush inside the cpa_lock, so that we
    1054             :          * don't allow any other cpu, with stale tlb entries change the
    1055             :          * page attribute in parallel, that also falls into the
    1056             :          * just split large page entry.
    1057             :          */
    1058          14 :         flush_tlb_all();
    1059          14 :         spin_unlock(&pgd_lock);
    1060             : 
    1061          14 :         return 0;
    1062             : }
    1063             : 
    1064          14 : static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
    1065             :                             unsigned long address)
    1066             : {
    1067          14 :         struct page *base;
    1068             : 
    1069          14 :         if (!debug_pagealloc_enabled())
    1070          14 :                 spin_unlock(&cpa_lock);
    1071          14 :         base = alloc_pages(GFP_KERNEL, 0);
    1072          14 :         if (!debug_pagealloc_enabled())
    1073          14 :                 spin_lock(&cpa_lock);
    1074          14 :         if (!base)
    1075             :                 return -ENOMEM;
    1076             : 
    1077          14 :         if (__split_large_page(cpa, kpte, address, base))
    1078           0 :                 __free_page(base);
    1079             : 
    1080             :         return 0;
    1081             : }
    1082             : 
    1083           0 : static bool try_to_free_pte_page(pte_t *pte)
    1084             : {
    1085           0 :         int i;
    1086             : 
    1087           0 :         for (i = 0; i < PTRS_PER_PTE; i++)
    1088           0 :                 if (!pte_none(pte[i]))
    1089             :                         return false;
    1090             : 
    1091           0 :         free_page((unsigned long)pte);
    1092           0 :         return true;
    1093             : }
    1094             : 
    1095           0 : static bool try_to_free_pmd_page(pmd_t *pmd)
    1096             : {
    1097           0 :         int i;
    1098             : 
    1099           0 :         for (i = 0; i < PTRS_PER_PMD; i++)
    1100           0 :                 if (!pmd_none(pmd[i]))
    1101             :                         return false;
    1102             : 
    1103           0 :         free_page((unsigned long)pmd);
    1104           0 :         return true;
    1105             : }
    1106             : 
    1107           0 : static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
    1108             : {
    1109           0 :         pte_t *pte = pte_offset_kernel(pmd, start);
    1110             : 
    1111           0 :         while (start < end) {
    1112           0 :                 set_pte(pte, __pte(0));
    1113             : 
    1114           0 :                 start += PAGE_SIZE;
    1115           0 :                 pte++;
    1116             :         }
    1117             : 
    1118           0 :         if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
    1119           0 :                 pmd_clear(pmd);
    1120           0 :                 return true;
    1121             :         }
    1122             :         return false;
    1123             : }
    1124             : 
    1125           0 : static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
    1126             :                               unsigned long start, unsigned long end)
    1127             : {
    1128           0 :         if (unmap_pte_range(pmd, start, end))
    1129           0 :                 if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
    1130           0 :                         pud_clear(pud);
    1131           0 : }
    1132             : 
    1133           0 : static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
    1134             : {
    1135           0 :         pmd_t *pmd = pmd_offset(pud, start);
    1136             : 
    1137             :         /*
    1138             :          * Not on a 2MB page boundary?
    1139             :          */
    1140           0 :         if (start & (PMD_SIZE - 1)) {
    1141           0 :                 unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
    1142           0 :                 unsigned long pre_end = min_t(unsigned long, end, next_page);
    1143             : 
    1144           0 :                 __unmap_pmd_range(pud, pmd, start, pre_end);
    1145             : 
    1146           0 :                 start = pre_end;
    1147           0 :                 pmd++;
    1148             :         }
    1149             : 
    1150             :         /*
    1151             :          * Try to unmap in 2M chunks.
    1152             :          */
    1153           0 :         while (end - start >= PMD_SIZE) {
    1154           0 :                 if (pmd_large(*pmd))
    1155           0 :                         pmd_clear(pmd);
    1156             :                 else
    1157           0 :                         __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
    1158             : 
    1159           0 :                 start += PMD_SIZE;
    1160           0 :                 pmd++;
    1161             :         }
    1162             : 
    1163             :         /*
    1164             :          * 4K leftovers?
    1165             :          */
    1166           0 :         if (start < end)
    1167           0 :                 return __unmap_pmd_range(pud, pmd, start, end);
    1168             : 
    1169             :         /*
    1170             :          * Try again to free the PMD page if haven't succeeded above.
    1171             :          */
    1172           0 :         if (!pud_none(*pud))
    1173           0 :                 if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
    1174           0 :                         pud_clear(pud);
    1175             : }
    1176             : 
    1177           0 : static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
    1178             : {
    1179           0 :         pud_t *pud = pud_offset(p4d, start);
    1180             : 
    1181             :         /*
    1182             :          * Not on a GB page boundary?
    1183             :          */
    1184           0 :         if (start & (PUD_SIZE - 1)) {
    1185           0 :                 unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
    1186           0 :                 unsigned long pre_end   = min_t(unsigned long, end, next_page);
    1187             : 
    1188           0 :                 unmap_pmd_range(pud, start, pre_end);
    1189             : 
    1190           0 :                 start = pre_end;
    1191           0 :                 pud++;
    1192             :         }
    1193             : 
    1194             :         /*
    1195             :          * Try to unmap in 1G chunks?
    1196             :          */
    1197           0 :         while (end - start >= PUD_SIZE) {
    1198             : 
    1199           0 :                 if (pud_large(*pud))
    1200           0 :                         pud_clear(pud);
    1201             :                 else
    1202           0 :                         unmap_pmd_range(pud, start, start + PUD_SIZE);
    1203             : 
    1204           0 :                 start += PUD_SIZE;
    1205           0 :                 pud++;
    1206             :         }
    1207             : 
    1208             :         /*
    1209             :          * 2M leftovers?
    1210             :          */
    1211           0 :         if (start < end)
    1212           0 :                 unmap_pmd_range(pud, start, end);
    1213             : 
    1214             :         /*
    1215             :          * No need to try to free the PUD page because we'll free it in
    1216             :          * populate_pgd's error path
    1217             :          */
    1218           0 : }
    1219             : 
    1220           0 : static int alloc_pte_page(pmd_t *pmd)
    1221             : {
    1222           0 :         pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
    1223           0 :         if (!pte)
    1224             :                 return -1;
    1225             : 
    1226           0 :         set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
    1227           0 :         return 0;
    1228             : }
    1229             : 
    1230           0 : static int alloc_pmd_page(pud_t *pud)
    1231             : {
    1232           0 :         pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
    1233           0 :         if (!pmd)
    1234             :                 return -1;
    1235             : 
    1236           0 :         set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
    1237           0 :         return 0;
    1238             : }
    1239             : 
    1240           0 : static void populate_pte(struct cpa_data *cpa,
    1241             :                          unsigned long start, unsigned long end,
    1242             :                          unsigned num_pages, pmd_t *pmd, pgprot_t pgprot)
    1243             : {
    1244           0 :         pte_t *pte;
    1245             : 
    1246           0 :         pte = pte_offset_kernel(pmd, start);
    1247             : 
    1248           0 :         pgprot = pgprot_clear_protnone_bits(pgprot);
    1249             : 
    1250           0 :         while (num_pages-- && start < end) {
    1251           0 :                 set_pte(pte, pfn_pte(cpa->pfn, pgprot));
    1252             : 
    1253           0 :                 start    += PAGE_SIZE;
    1254           0 :                 cpa->pfn++;
    1255           0 :                 pte++;
    1256             :         }
    1257           0 : }
    1258             : 
    1259           0 : static long populate_pmd(struct cpa_data *cpa,
    1260             :                          unsigned long start, unsigned long end,
    1261             :                          unsigned num_pages, pud_t *pud, pgprot_t pgprot)
    1262             : {
    1263           0 :         long cur_pages = 0;
    1264           0 :         pmd_t *pmd;
    1265           0 :         pgprot_t pmd_pgprot;
    1266             : 
    1267             :         /*
    1268             :          * Not on a 2M boundary?
    1269             :          */
    1270           0 :         if (start & (PMD_SIZE - 1)) {
    1271           0 :                 unsigned long pre_end = start + (num_pages << PAGE_SHIFT);
    1272           0 :                 unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
    1273             : 
    1274           0 :                 pre_end   = min_t(unsigned long, pre_end, next_page);
    1275           0 :                 cur_pages = (pre_end - start) >> PAGE_SHIFT;
    1276           0 :                 cur_pages = min_t(unsigned int, num_pages, cur_pages);
    1277             : 
    1278             :                 /*
    1279             :                  * Need a PTE page?
    1280             :                  */
    1281           0 :                 pmd = pmd_offset(pud, start);
    1282           0 :                 if (pmd_none(*pmd))
    1283           0 :                         if (alloc_pte_page(pmd))
    1284             :                                 return -1;
    1285             : 
    1286           0 :                 populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot);
    1287             : 
    1288           0 :                 start = pre_end;
    1289             :         }
    1290             : 
    1291             :         /*
    1292             :          * We mapped them all?
    1293             :          */
    1294           0 :         if (num_pages == cur_pages)
    1295             :                 return cur_pages;
    1296             : 
    1297           0 :         pmd_pgprot = pgprot_4k_2_large(pgprot);
    1298             : 
    1299           0 :         while (end - start >= PMD_SIZE) {
    1300             : 
    1301             :                 /*
    1302             :                  * We cannot use a 1G page so allocate a PMD page if needed.
    1303             :                  */
    1304           0 :                 if (pud_none(*pud))
    1305           0 :                         if (alloc_pmd_page(pud))
    1306           0 :                                 return -1;
    1307             : 
    1308           0 :                 pmd = pmd_offset(pud, start);
    1309             : 
    1310           0 :                 set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn,
    1311             :                                         canon_pgprot(pmd_pgprot))));
    1312             : 
    1313           0 :                 start     += PMD_SIZE;
    1314           0 :                 cpa->pfn  += PMD_SIZE >> PAGE_SHIFT;
    1315           0 :                 cur_pages += PMD_SIZE >> PAGE_SHIFT;
    1316             :         }
    1317             : 
    1318             :         /*
    1319             :          * Map trailing 4K pages.
    1320             :          */
    1321           0 :         if (start < end) {
    1322           0 :                 pmd = pmd_offset(pud, start);
    1323           0 :                 if (pmd_none(*pmd))
    1324           0 :                         if (alloc_pte_page(pmd))
    1325             :                                 return -1;
    1326             : 
    1327           0 :                 populate_pte(cpa, start, end, num_pages - cur_pages,
    1328             :                              pmd, pgprot);
    1329             :         }
    1330             :         return num_pages;
    1331             : }
    1332             : 
    1333           0 : static int populate_pud(struct cpa_data *cpa, unsigned long start, p4d_t *p4d,
    1334             :                         pgprot_t pgprot)
    1335             : {
    1336           0 :         pud_t *pud;
    1337           0 :         unsigned long end;
    1338           0 :         long cur_pages = 0;
    1339           0 :         pgprot_t pud_pgprot;
    1340             : 
    1341           0 :         end = start + (cpa->numpages << PAGE_SHIFT);
    1342             : 
    1343             :         /*
    1344             :          * Not on a Gb page boundary? => map everything up to it with
    1345             :          * smaller pages.
    1346             :          */
    1347           0 :         if (start & (PUD_SIZE - 1)) {
    1348           0 :                 unsigned long pre_end;
    1349           0 :                 unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
    1350             : 
    1351           0 :                 pre_end   = min_t(unsigned long, end, next_page);
    1352           0 :                 cur_pages = (pre_end - start) >> PAGE_SHIFT;
    1353           0 :                 cur_pages = min_t(int, (int)cpa->numpages, cur_pages);
    1354             : 
    1355           0 :                 pud = pud_offset(p4d, start);
    1356             : 
    1357             :                 /*
    1358             :                  * Need a PMD page?
    1359             :                  */
    1360           0 :                 if (pud_none(*pud))
    1361           0 :                         if (alloc_pmd_page(pud))
    1362             :                                 return -1;
    1363             : 
    1364           0 :                 cur_pages = populate_pmd(cpa, start, pre_end, cur_pages,
    1365             :                                          pud, pgprot);
    1366           0 :                 if (cur_pages < 0)
    1367           0 :                         return cur_pages;
    1368             : 
    1369             :                 start = pre_end;
    1370             :         }
    1371             : 
    1372             :         /* We mapped them all? */
    1373           0 :         if (cpa->numpages == cur_pages)
    1374           0 :                 return cur_pages;
    1375             : 
    1376           0 :         pud = pud_offset(p4d, start);
    1377           0 :         pud_pgprot = pgprot_4k_2_large(pgprot);
    1378             : 
    1379             :         /*
    1380             :          * Map everything starting from the Gb boundary, possibly with 1G pages
    1381             :          */
    1382           0 :         while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) {
    1383           0 :                 set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn,
    1384             :                                    canon_pgprot(pud_pgprot))));
    1385             : 
    1386           0 :                 start     += PUD_SIZE;
    1387           0 :                 cpa->pfn  += PUD_SIZE >> PAGE_SHIFT;
    1388           0 :                 cur_pages += PUD_SIZE >> PAGE_SHIFT;
    1389           0 :                 pud++;
    1390             :         }
    1391             : 
    1392             :         /* Map trailing leftover */
    1393           0 :         if (start < end) {
    1394           0 :                 long tmp;
    1395             : 
    1396           0 :                 pud = pud_offset(p4d, start);
    1397           0 :                 if (pud_none(*pud))
    1398           0 :                         if (alloc_pmd_page(pud))
    1399             :                                 return -1;
    1400             : 
    1401           0 :                 tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages,
    1402             :                                    pud, pgprot);
    1403           0 :                 if (tmp < 0)
    1404           0 :                         return cur_pages;
    1405             : 
    1406           0 :                 cur_pages += tmp;
    1407             :         }
    1408           0 :         return cur_pages;
    1409             : }
    1410             : 
    1411             : /*
    1412             :  * Restrictions for kernel page table do not necessarily apply when mapping in
    1413             :  * an alternate PGD.
    1414             :  */
    1415           0 : static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
    1416             : {
    1417           0 :         pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
    1418           0 :         pud_t *pud = NULL;      /* shut up gcc */
    1419           0 :         p4d_t *p4d;
    1420           0 :         pgd_t *pgd_entry;
    1421           0 :         long ret;
    1422             : 
    1423           0 :         pgd_entry = cpa->pgd + pgd_index(addr);
    1424             : 
    1425           0 :         if (pgd_none(*pgd_entry)) {
    1426             :                 p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL);
    1427             :                 if (!p4d)
    1428             :                         return -1;
    1429             : 
    1430             :                 set_pgd(pgd_entry, __pgd(__pa(p4d) | _KERNPG_TABLE));
    1431             :         }
    1432             : 
    1433             :         /*
    1434             :          * Allocate a PUD page and hand it down for mapping.
    1435             :          */
    1436           0 :         p4d = p4d_offset(pgd_entry, addr);
    1437           0 :         if (p4d_none(*p4d)) {
    1438           0 :                 pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
    1439           0 :                 if (!pud)
    1440             :                         return -1;
    1441             : 
    1442           0 :                 set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
    1443             :         }
    1444             : 
    1445           0 :         pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
    1446           0 :         pgprot_val(pgprot) |=  pgprot_val(cpa->mask_set);
    1447             : 
    1448           0 :         ret = populate_pud(cpa, addr, p4d, pgprot);
    1449           0 :         if (ret < 0) {
    1450             :                 /*
    1451             :                  * Leave the PUD page in place in case some other CPU or thread
    1452             :                  * already found it, but remove any useless entries we just
    1453             :                  * added to it.
    1454             :                  */
    1455           0 :                 unmap_pud_range(p4d, addr,
    1456           0 :                                 addr + (cpa->numpages << PAGE_SHIFT));
    1457           0 :                 return ret;
    1458             :         }
    1459             : 
    1460           0 :         cpa->numpages = ret;
    1461           0 :         return 0;
    1462             : }
    1463             : 
    1464           0 : static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
    1465             :                                int primary)
    1466             : {
    1467           0 :         if (cpa->pgd) {
    1468             :                 /*
    1469             :                  * Right now, we only execute this code path when mapping
    1470             :                  * the EFI virtual memory map regions, no other users
    1471             :                  * provide a ->pgd value. This may change in the future.
    1472             :                  */
    1473           0 :                 return populate_pgd(cpa, vaddr);
    1474             :         }
    1475             : 
    1476             :         /*
    1477             :          * Ignore all non primary paths.
    1478             :          */
    1479           0 :         if (!primary) {
    1480           0 :                 cpa->numpages = 1;
    1481           0 :                 return 0;
    1482             :         }
    1483             : 
    1484             :         /*
    1485             :          * Ignore the NULL PTE for kernel identity mapping, as it is expected
    1486             :          * to have holes.
    1487             :          * Also set numpages to '1' indicating that we processed cpa req for
    1488             :          * one virtual address page and its pfn. TBD: numpages can be set based
    1489             :          * on the initial value and the level returned by lookup_address().
    1490             :          */
    1491           0 :         if (within(vaddr, PAGE_OFFSET,
    1492           0 :                    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
    1493           0 :                 cpa->numpages = 1;
    1494           0 :                 cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
    1495           0 :                 return 0;
    1496             : 
    1497           0 :         } else if (__cpa_pfn_in_highmap(cpa->pfn)) {
    1498             :                 /* Faults in the highmap are OK, so do not warn: */
    1499             :                 return -EFAULT;
    1500             :         } else {
    1501           0 :                 WARN(1, KERN_WARNING "CPA: called for zero pte. "
    1502             :                         "vaddr = %lx cpa->vaddr = %lx\n", vaddr,
    1503             :                         *cpa->vaddr);
    1504             : 
    1505           0 :                 return -EFAULT;
    1506             :         }
    1507             : }
    1508             : 
    1509        4373 : static int __change_page_attr(struct cpa_data *cpa, int primary)
    1510             : {
    1511        4373 :         unsigned long address;
    1512        4373 :         int do_split, err;
    1513        4373 :         unsigned int level;
    1514        4373 :         pte_t *kpte, old_pte;
    1515             : 
    1516        4373 :         address = __cpa_addr(cpa, cpa->curpage);
    1517        4387 : repeat:
    1518        4387 :         kpte = _lookup_address_cpa(cpa, address, &level);
    1519        4387 :         if (!kpte)
    1520           0 :                 return __cpa_process_fault(cpa, address, primary);
    1521             : 
    1522        4387 :         old_pte = *kpte;
    1523        4387 :         if (pte_none(old_pte))
    1524           0 :                 return __cpa_process_fault(cpa, address, primary);
    1525             : 
    1526        4387 :         if (level == PG_LEVEL_4K) {
    1527        4234 :                 pte_t new_pte;
    1528        4234 :                 pgprot_t new_prot = pte_pgprot(old_pte);
    1529        4234 :                 unsigned long pfn = pte_pfn(old_pte);
    1530             : 
    1531        4234 :                 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
    1532        4234 :                 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
    1533             : 
    1534        4234 :                 cpa_inc_4k_install();
    1535             :                 /* Hand in lpsize = 0 to enforce the protection mechanism */
    1536        4234 :                 new_prot = static_protections(new_prot, address, pfn, 1, 0,
    1537             :                                               CPA_PROTECT);
    1538             : 
    1539        4234 :                 new_prot = pgprot_clear_protnone_bits(new_prot);
    1540             : 
    1541             :                 /*
    1542             :                  * We need to keep the pfn from the existing PTE,
    1543             :                  * after all we're only going to change it's attributes
    1544             :                  * not the memory it points to
    1545             :                  */
    1546        4234 :                 new_pte = pfn_pte(pfn, new_prot);
    1547        4234 :                 cpa->pfn = pfn;
    1548             :                 /*
    1549             :                  * Do we really change anything ?
    1550             :                  */
    1551        4234 :                 if (pte_val(old_pte) != pte_val(new_pte)) {
    1552        2826 :                         set_pte_atomic(kpte, new_pte);
    1553        2826 :                         cpa->flags |= CPA_FLUSHTLB;
    1554             :                 }
    1555        4234 :                 cpa->numpages = 1;
    1556        4234 :                 return 0;
    1557             :         }
    1558             : 
    1559             :         /*
    1560             :          * Check, whether we can keep the large page intact
    1561             :          * and just change the pte:
    1562             :          */
    1563         153 :         do_split = should_split_large_page(kpte, address, cpa);
    1564             :         /*
    1565             :          * When the range fits into the existing large page,
    1566             :          * return. cp->numpages and cpa->tlbflush have been updated in
    1567             :          * try_large_page:
    1568             :          */
    1569         153 :         if (do_split <= 0)
    1570         139 :                 return do_split;
    1571             : 
    1572             :         /*
    1573             :          * We have to split the large page:
    1574             :          */
    1575          14 :         err = split_large_page(cpa, kpte, address);
    1576          14 :         if (!err)
    1577          14 :                 goto repeat;
    1578             : 
    1579             :         return err;
    1580             : }
    1581             : 
    1582             : static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
    1583             : 
    1584         988 : static int cpa_process_alias(struct cpa_data *cpa)
    1585             : {
    1586         988 :         struct cpa_data alias_cpa;
    1587         988 :         unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
    1588         988 :         unsigned long vaddr;
    1589         988 :         int ret;
    1590             : 
    1591         988 :         if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1))
    1592             :                 return 0;
    1593             : 
    1594             :         /*
    1595             :          * No need to redo, when the primary call touched the direct
    1596             :          * mapping already:
    1597             :          */
    1598         988 :         vaddr = __cpa_addr(cpa, cpa->curpage);
    1599         988 :         if (!(within(vaddr, PAGE_OFFSET,
    1600         988 :                     PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
    1601             : 
    1602         986 :                 alias_cpa = *cpa;
    1603         986 :                 alias_cpa.vaddr = &laddr;
    1604         986 :                 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
    1605         986 :                 alias_cpa.curpage = 0;
    1606             : 
    1607         986 :                 cpa->force_flush_all = 1;
    1608             : 
    1609         986 :                 ret = __change_page_attr_set_clr(&alias_cpa, 0);
    1610         986 :                 if (ret)
    1611             :                         return ret;
    1612             :         }
    1613             : 
    1614             : #ifdef CONFIG_X86_64
    1615             :         /*
    1616             :          * If the primary call didn't touch the high mapping already
    1617             :          * and the physical address is inside the kernel map, we need
    1618             :          * to touch the high mapped kernel as well:
    1619             :          */
    1620         988 :         if (!within(vaddr, (unsigned long)_text, _brk_end) &&
    1621           9 :             __cpa_pfn_in_highmap(cpa->pfn)) {
    1622           0 :                 unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
    1623           0 :                                                __START_KERNEL_map - phys_base;
    1624           0 :                 alias_cpa = *cpa;
    1625           0 :                 alias_cpa.vaddr = &temp_cpa_vaddr;
    1626           0 :                 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
    1627           0 :                 alias_cpa.curpage = 0;
    1628             : 
    1629           0 :                 cpa->force_flush_all = 1;
    1630             :                 /*
    1631             :                  * The high mapping range is imprecise, so ignore the
    1632             :                  * return value.
    1633             :                  */
    1634           0 :                 __change_page_attr_set_clr(&alias_cpa, 0);
    1635             :         }
    1636             : #endif
    1637             : 
    1638             :         return 0;
    1639             : }
    1640             : 
    1641        1007 : static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
    1642             : {
    1643        1007 :         unsigned long numpages = cpa->numpages;
    1644        1007 :         unsigned long rempages = numpages;
    1645        1007 :         int ret = 0;
    1646             : 
    1647        5380 :         while (rempages) {
    1648             :                 /*
    1649             :                  * Store the remaining nr of pages for the large page
    1650             :                  * preservation check.
    1651             :                  */
    1652        4373 :                 cpa->numpages = rempages;
    1653             :                 /* for array changes, we can't use large page */
    1654        4373 :                 if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY))
    1655           0 :                         cpa->numpages = 1;
    1656             : 
    1657        4373 :                 if (!debug_pagealloc_enabled())
    1658        4373 :                         spin_lock(&cpa_lock);
    1659        4373 :                 ret = __change_page_attr(cpa, checkalias);
    1660        4373 :                 if (!debug_pagealloc_enabled())
    1661        4373 :                         spin_unlock(&cpa_lock);
    1662        4373 :                 if (ret)
    1663           0 :                         goto out;
    1664             : 
    1665        4373 :                 if (checkalias) {
    1666         988 :                         ret = cpa_process_alias(cpa);
    1667         988 :                         if (ret)
    1668           0 :                                 goto out;
    1669             :                 }
    1670             : 
    1671             :                 /*
    1672             :                  * Adjust the number of pages with the result of the
    1673             :                  * CPA operation. Either a large page has been
    1674             :                  * preserved or a single page update happened.
    1675             :                  */
    1676        4373 :                 BUG_ON(cpa->numpages > rempages || !cpa->numpages);
    1677        4373 :                 rempages -= cpa->numpages;
    1678        4373 :                 cpa->curpage += cpa->numpages;
    1679             :         }
    1680             : 
    1681        1007 : out:
    1682             :         /* Restore the original numpages */
    1683        1007 :         cpa->numpages = numpages;
    1684        1007 :         return ret;
    1685             : }
    1686             : 
    1687          21 : static int change_page_attr_set_clr(unsigned long *addr, int numpages,
    1688             :                                     pgprot_t mask_set, pgprot_t mask_clr,
    1689             :                                     int force_split, int in_flag,
    1690             :                                     struct page **pages)
    1691             : {
    1692          21 :         struct cpa_data cpa;
    1693          21 :         int ret, cache, checkalias;
    1694             : 
    1695          21 :         memset(&cpa, 0, sizeof(cpa));
    1696             : 
    1697             :         /*
    1698             :          * Check, if we are requested to set a not supported
    1699             :          * feature.  Clearing non-supported features is OK.
    1700             :          */
    1701          21 :         mask_set = canon_pgprot(mask_set);
    1702             : 
    1703          21 :         if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
    1704             :                 return 0;
    1705             : 
    1706             :         /* Ensure we are PAGE_SIZE aligned */
    1707          21 :         if (in_flag & CPA_ARRAY) {
    1708             :                 int i;
    1709           0 :                 for (i = 0; i < numpages; i++) {
    1710           0 :                         if (addr[i] & ~PAGE_MASK) {
    1711           0 :                                 addr[i] &= PAGE_MASK;
    1712           0 :                                 WARN_ON_ONCE(1);
    1713             :                         }
    1714             :                 }
    1715          21 :         } else if (!(in_flag & CPA_PAGES_ARRAY)) {
    1716             :                 /*
    1717             :                  * in_flag of CPA_PAGES_ARRAY implies it is aligned.
    1718             :                  * No need to check in that case
    1719             :                  */
    1720          21 :                 if (*addr & ~PAGE_MASK) {
    1721           0 :                         *addr &= PAGE_MASK;
    1722             :                         /*
    1723             :                          * People should not be passing in unaligned addresses:
    1724             :                          */
    1725           0 :                         WARN_ON_ONCE(1);
    1726             :                 }
    1727             :         }
    1728             : 
    1729             :         /* Must avoid aliasing mappings in the highmem code */
    1730          21 :         kmap_flush_unused();
    1731             : 
    1732          21 :         vm_unmap_aliases();
    1733             : 
    1734          21 :         cpa.vaddr = addr;
    1735          21 :         cpa.pages = pages;
    1736          21 :         cpa.numpages = numpages;
    1737          21 :         cpa.mask_set = mask_set;
    1738          21 :         cpa.mask_clr = mask_clr;
    1739          21 :         cpa.flags = 0;
    1740          21 :         cpa.curpage = 0;
    1741          21 :         cpa.force_split = force_split;
    1742             : 
    1743          21 :         if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
    1744           0 :                 cpa.flags |= in_flag;
    1745             : 
    1746             :         /* No alias checking for _NX bit modifications */
    1747          21 :         checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
    1748             :         /* Has caller explicitly disabled alias checking? */
    1749          21 :         if (in_flag & CPA_NO_CHECK_ALIAS)
    1750           0 :                 checkalias = 0;
    1751             : 
    1752          21 :         ret = __change_page_attr_set_clr(&cpa, checkalias);
    1753             : 
    1754             :         /*
    1755             :          * Check whether we really changed something:
    1756             :          */
    1757          21 :         if (!(cpa.flags & CPA_FLUSHTLB))
    1758           5 :                 goto out;
    1759             : 
    1760             :         /*
    1761             :          * No need to flush, when we did not set any of the caching
    1762             :          * attributes:
    1763             :          */
    1764          16 :         cache = !!pgprot2cachemode(mask_set);
    1765             : 
    1766             :         /*
    1767             :          * On error; flush everything to be sure.
    1768             :          */
    1769          16 :         if (ret) {
    1770           0 :                 cpa_flush_all(cache);
    1771           0 :                 goto out;
    1772             :         }
    1773             : 
    1774          16 :         cpa_flush(&cpa, cache);
    1775             : out:
    1776             :         return ret;
    1777             : }
    1778             : 
    1779          10 : static inline int change_page_attr_set(unsigned long *addr, int numpages,
    1780             :                                        pgprot_t mask, int array)
    1781             : {
    1782          20 :         return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
    1783             :                 (array ? CPA_ARRAY : 0), NULL);
    1784             : }
    1785             : 
    1786          11 : static inline int change_page_attr_clear(unsigned long *addr, int numpages,
    1787             :                                          pgprot_t mask, int array)
    1788             : {
    1789          22 :         return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
    1790             :                 (array ? CPA_ARRAY : 0), NULL);
    1791             : }
    1792             : 
    1793           0 : static inline int cpa_set_pages_array(struct page **pages, int numpages,
    1794             :                                        pgprot_t mask)
    1795             : {
    1796           0 :         return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0,
    1797             :                 CPA_PAGES_ARRAY, pages);
    1798             : }
    1799             : 
    1800           0 : static inline int cpa_clear_pages_array(struct page **pages, int numpages,
    1801             :                                          pgprot_t mask)
    1802             : {
    1803           0 :         return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0,
    1804             :                 CPA_PAGES_ARRAY, pages);
    1805             : }
    1806             : 
    1807             : /*
    1808             :  * _set_memory_prot is an internal helper for callers that have been passed
    1809             :  * a pgprot_t value from upper layers and a reservation has already been taken.
    1810             :  * If you want to set the pgprot to a specific page protocol, use the
    1811             :  * set_memory_xx() functions.
    1812             :  */
    1813           0 : int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot)
    1814             : {
    1815           0 :         return change_page_attr_set_clr(&addr, numpages, prot,
    1816           0 :                                         __pgprot(~pgprot_val(prot)), 0, 0,
    1817             :                                         NULL);
    1818             : }
    1819             : 
    1820           0 : int _set_memory_uc(unsigned long addr, int numpages)
    1821             : {
    1822             :         /*
    1823             :          * for now UC MINUS. see comments in ioremap()
    1824             :          * If you really need strong UC use ioremap_uc(), but note
    1825             :          * that you cannot override IO areas with set_memory_*() as
    1826             :          * these helpers cannot work with IO memory.
    1827             :          */
    1828           0 :         return change_page_attr_set(&addr, numpages,
    1829             :                                     cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
    1830             :                                     0);
    1831             : }
    1832             : 
    1833           0 : int set_memory_uc(unsigned long addr, int numpages)
    1834             : {
    1835           0 :         int ret;
    1836             : 
    1837             :         /*
    1838             :          * for now UC MINUS. see comments in ioremap()
    1839             :          */
    1840           0 :         ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
    1841             :                               _PAGE_CACHE_MODE_UC_MINUS, NULL);
    1842           0 :         if (ret)
    1843           0 :                 goto out_err;
    1844             : 
    1845           0 :         ret = _set_memory_uc(addr, numpages);
    1846           0 :         if (ret)
    1847           0 :                 goto out_free;
    1848             : 
    1849             :         return 0;
    1850             : 
    1851           0 : out_free:
    1852           0 :         memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
    1853             : out_err:
    1854             :         return ret;
    1855             : }
    1856             : EXPORT_SYMBOL(set_memory_uc);
    1857             : 
    1858           0 : int _set_memory_wc(unsigned long addr, int numpages)
    1859             : {
    1860           0 :         int ret;
    1861             : 
    1862           0 :         ret = change_page_attr_set(&addr, numpages,
    1863             :                                    cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
    1864             :                                    0);
    1865           0 :         if (!ret) {
    1866           0 :                 ret = change_page_attr_set_clr(&addr, numpages,
    1867             :                                                cachemode2pgprot(_PAGE_CACHE_MODE_WC),
    1868           0 :                                                __pgprot(_PAGE_CACHE_MASK),
    1869             :                                                0, 0, NULL);
    1870             :         }
    1871           0 :         return ret;
    1872             : }
    1873             : 
    1874           0 : int set_memory_wc(unsigned long addr, int numpages)
    1875             : {
    1876           0 :         int ret;
    1877             : 
    1878           0 :         ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
    1879             :                 _PAGE_CACHE_MODE_WC, NULL);
    1880           0 :         if (ret)
    1881             :                 return ret;
    1882             : 
    1883           0 :         ret = _set_memory_wc(addr, numpages);
    1884           0 :         if (ret)
    1885           0 :                 memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
    1886             : 
    1887             :         return ret;
    1888             : }
    1889             : EXPORT_SYMBOL(set_memory_wc);
    1890             : 
    1891           0 : int _set_memory_wt(unsigned long addr, int numpages)
    1892             : {
    1893           0 :         return change_page_attr_set(&addr, numpages,
    1894             :                                     cachemode2pgprot(_PAGE_CACHE_MODE_WT), 0);
    1895             : }
    1896             : 
    1897           0 : int _set_memory_wb(unsigned long addr, int numpages)
    1898             : {
    1899             :         /* WB cache mode is hard wired to all cache attribute bits being 0 */
    1900           0 :         return change_page_attr_clear(&addr, numpages,
    1901           0 :                                       __pgprot(_PAGE_CACHE_MASK), 0);
    1902             : }
    1903             : 
    1904           0 : int set_memory_wb(unsigned long addr, int numpages)
    1905             : {
    1906           0 :         int ret;
    1907             : 
    1908           0 :         ret = _set_memory_wb(addr, numpages);
    1909           0 :         if (ret)
    1910             :                 return ret;
    1911             : 
    1912           0 :         memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
    1913           0 :         return 0;
    1914             : }
    1915             : EXPORT_SYMBOL(set_memory_wb);
    1916             : 
    1917           1 : int set_memory_x(unsigned long addr, int numpages)
    1918             : {
    1919           1 :         if (!(__supported_pte_mask & _PAGE_NX))
    1920             :                 return 0;
    1921             : 
    1922           1 :         return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
    1923             : }
    1924             : 
    1925           6 : int set_memory_nx(unsigned long addr, int numpages)
    1926             : {
    1927           6 :         if (!(__supported_pte_mask & _PAGE_NX))
    1928             :                 return 0;
    1929             : 
    1930           6 :         return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
    1931             : }
    1932             : 
    1933          10 : int set_memory_ro(unsigned long addr, int numpages)
    1934             : {
    1935          10 :         return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
    1936             : }
    1937             : 
    1938           4 : int set_memory_rw(unsigned long addr, int numpages)
    1939             : {
    1940           4 :         return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
    1941             : }
    1942             : 
    1943           0 : int set_memory_np(unsigned long addr, int numpages)
    1944             : {
    1945           0 :         return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
    1946             : }
    1947             : 
    1948           0 : int set_memory_np_noalias(unsigned long addr, int numpages)
    1949             : {
    1950           0 :         int cpa_flags = CPA_NO_CHECK_ALIAS;
    1951             : 
    1952           0 :         return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
    1953           0 :                                         __pgprot(_PAGE_PRESENT), 0,
    1954             :                                         cpa_flags, NULL);
    1955             : }
    1956             : 
    1957           0 : int set_memory_4k(unsigned long addr, int numpages)
    1958             : {
    1959           0 :         return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
    1960           0 :                                         __pgprot(0), 1, 0, NULL);
    1961             : }
    1962             : 
    1963           0 : int set_memory_nonglobal(unsigned long addr, int numpages)
    1964             : {
    1965           0 :         return change_page_attr_clear(&addr, numpages,
    1966           0 :                                       __pgprot(_PAGE_GLOBAL), 0);
    1967             : }
    1968             : 
    1969           0 : int set_memory_global(unsigned long addr, int numpages)
    1970             : {
    1971           0 :         return change_page_attr_set(&addr, numpages,
    1972           0 :                                     __pgprot(_PAGE_GLOBAL), 0);
    1973             : }
    1974             : 
    1975           0 : static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
    1976             : {
    1977           0 :         struct cpa_data cpa;
    1978           0 :         int ret;
    1979             : 
    1980             :         /* Nothing to do if memory encryption is not active */
    1981           0 :         if (!mem_encrypt_active())
    1982           0 :                 return 0;
    1983             : 
    1984             :         /* Should not be working on unaligned addresses */
    1985             :         if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr))
    1986             :                 addr &= PAGE_MASK;
    1987             : 
    1988             :         memset(&cpa, 0, sizeof(cpa));
    1989             :         cpa.vaddr = &addr;
    1990             :         cpa.numpages = numpages;
    1991             :         cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0);
    1992             :         cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC);
    1993             :         cpa.pgd = init_mm.pgd;
    1994             : 
    1995             :         /* Must avoid aliasing mappings in the highmem code */
    1996             :         kmap_flush_unused();
    1997             :         vm_unmap_aliases();
    1998             : 
    1999             :         /*
    2000             :          * Before changing the encryption attribute, we need to flush caches.
    2001             :          */
    2002             :         cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT));
    2003             : 
    2004             :         ret = __change_page_attr_set_clr(&cpa, 1);
    2005             : 
    2006             :         /*
    2007             :          * After changing the encryption attribute, we need to flush TLBs again
    2008             :          * in case any speculative TLB caching occurred (but no need to flush
    2009             :          * caches again).  We could just use cpa_flush_all(), but in case TLB
    2010             :          * flushing gets optimized in the cpa_flush() path use the same logic
    2011             :          * as above.
    2012             :          */
    2013             :         cpa_flush(&cpa, 0);
    2014             : 
    2015             :         return ret;
    2016             : }
    2017             : 
    2018           0 : int set_memory_encrypted(unsigned long addr, int numpages)
    2019             : {
    2020           0 :         return __set_memory_enc_dec(addr, numpages, true);
    2021             : }
    2022             : EXPORT_SYMBOL_GPL(set_memory_encrypted);
    2023             : 
    2024           0 : int set_memory_decrypted(unsigned long addr, int numpages)
    2025             : {
    2026           0 :         return __set_memory_enc_dec(addr, numpages, false);
    2027             : }
    2028             : EXPORT_SYMBOL_GPL(set_memory_decrypted);
    2029             : 
    2030           0 : int set_pages_uc(struct page *page, int numpages)
    2031             : {
    2032           0 :         unsigned long addr = (unsigned long)page_address(page);
    2033             : 
    2034           0 :         return set_memory_uc(addr, numpages);
    2035             : }
    2036             : EXPORT_SYMBOL(set_pages_uc);
    2037             : 
    2038           0 : static int _set_pages_array(struct page **pages, int numpages,
    2039             :                 enum page_cache_mode new_type)
    2040             : {
    2041           0 :         unsigned long start;
    2042           0 :         unsigned long end;
    2043           0 :         enum page_cache_mode set_type;
    2044           0 :         int i;
    2045           0 :         int free_idx;
    2046           0 :         int ret;
    2047             : 
    2048           0 :         for (i = 0; i < numpages; i++) {
    2049           0 :                 if (PageHighMem(pages[i]))
    2050             :                         continue;
    2051           0 :                 start = page_to_pfn(pages[i]) << PAGE_SHIFT;
    2052           0 :                 end = start + PAGE_SIZE;
    2053           0 :                 if (memtype_reserve(start, end, new_type, NULL))
    2054           0 :                         goto err_out;
    2055             :         }
    2056             : 
    2057             :         /* If WC, set to UC- first and then WC */
    2058           0 :         set_type = (new_type == _PAGE_CACHE_MODE_WC) ?
    2059           0 :                                 _PAGE_CACHE_MODE_UC_MINUS : new_type;
    2060             : 
    2061           0 :         ret = cpa_set_pages_array(pages, numpages,
    2062             :                                   cachemode2pgprot(set_type));
    2063           0 :         if (!ret && new_type == _PAGE_CACHE_MODE_WC)
    2064           0 :                 ret = change_page_attr_set_clr(NULL, numpages,
    2065             :                                                cachemode2pgprot(
    2066             :                                                 _PAGE_CACHE_MODE_WC),
    2067           0 :                                                __pgprot(_PAGE_CACHE_MASK),
    2068             :                                                0, CPA_PAGES_ARRAY, pages);
    2069           0 :         if (ret)
    2070           0 :                 goto err_out;
    2071             :         return 0; /* Success */
    2072           0 : err_out:
    2073           0 :         free_idx = i;
    2074           0 :         for (i = 0; i < free_idx; i++) {
    2075           0 :                 if (PageHighMem(pages[i]))
    2076             :                         continue;
    2077           0 :                 start = page_to_pfn(pages[i]) << PAGE_SHIFT;
    2078           0 :                 end = start + PAGE_SIZE;
    2079           0 :                 memtype_free(start, end);
    2080             :         }
    2081             :         return -EINVAL;
    2082             : }
    2083             : 
    2084           0 : int set_pages_array_uc(struct page **pages, int numpages)
    2085             : {
    2086           0 :         return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_UC_MINUS);
    2087             : }
    2088             : EXPORT_SYMBOL(set_pages_array_uc);
    2089             : 
    2090           0 : int set_pages_array_wc(struct page **pages, int numpages)
    2091             : {
    2092           0 :         return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WC);
    2093             : }
    2094             : EXPORT_SYMBOL(set_pages_array_wc);
    2095             : 
    2096           0 : int set_pages_array_wt(struct page **pages, int numpages)
    2097             : {
    2098           0 :         return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WT);
    2099             : }
    2100             : EXPORT_SYMBOL_GPL(set_pages_array_wt);
    2101             : 
    2102           0 : int set_pages_wb(struct page *page, int numpages)
    2103             : {
    2104           0 :         unsigned long addr = (unsigned long)page_address(page);
    2105             : 
    2106           0 :         return set_memory_wb(addr, numpages);
    2107             : }
    2108             : EXPORT_SYMBOL(set_pages_wb);
    2109             : 
    2110           0 : int set_pages_array_wb(struct page **pages, int numpages)
    2111             : {
    2112           0 :         int retval;
    2113           0 :         unsigned long start;
    2114           0 :         unsigned long end;
    2115           0 :         int i;
    2116             : 
    2117             :         /* WB cache mode is hard wired to all cache attribute bits being 0 */
    2118           0 :         retval = cpa_clear_pages_array(pages, numpages,
    2119           0 :                         __pgprot(_PAGE_CACHE_MASK));
    2120           0 :         if (retval)
    2121             :                 return retval;
    2122             : 
    2123           0 :         for (i = 0; i < numpages; i++) {
    2124           0 :                 if (PageHighMem(pages[i]))
    2125             :                         continue;
    2126           0 :                 start = page_to_pfn(pages[i]) << PAGE_SHIFT;
    2127           0 :                 end = start + PAGE_SIZE;
    2128           0 :                 memtype_free(start, end);
    2129             :         }
    2130             : 
    2131             :         return 0;
    2132             : }
    2133             : EXPORT_SYMBOL(set_pages_array_wb);
    2134             : 
    2135           0 : int set_pages_ro(struct page *page, int numpages)
    2136             : {
    2137           0 :         unsigned long addr = (unsigned long)page_address(page);
    2138             : 
    2139           0 :         return set_memory_ro(addr, numpages);
    2140             : }
    2141             : 
    2142           0 : int set_pages_rw(struct page *page, int numpages)
    2143             : {
    2144           0 :         unsigned long addr = (unsigned long)page_address(page);
    2145             : 
    2146           0 :         return set_memory_rw(addr, numpages);
    2147             : }
    2148             : 
    2149           0 : static int __set_pages_p(struct page *page, int numpages)
    2150             : {
    2151           0 :         unsigned long tempaddr = (unsigned long) page_address(page);
    2152           0 :         struct cpa_data cpa = { .vaddr = &tempaddr,
    2153             :                                 .pgd = NULL,
    2154             :                                 .numpages = numpages,
    2155             :                                 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
    2156             :                                 .mask_clr = __pgprot(0),
    2157             :                                 .flags = 0};
    2158             : 
    2159             :         /*
    2160             :          * No alias checking needed for setting present flag. otherwise,
    2161             :          * we may need to break large pages for 64-bit kernel text
    2162             :          * mappings (this adds to complexity if we want to do this from
    2163             :          * atomic context especially). Let's keep it simple!
    2164             :          */
    2165           0 :         return __change_page_attr_set_clr(&cpa, 0);
    2166             : }
    2167             : 
    2168           0 : static int __set_pages_np(struct page *page, int numpages)
    2169             : {
    2170           0 :         unsigned long tempaddr = (unsigned long) page_address(page);
    2171           0 :         struct cpa_data cpa = { .vaddr = &tempaddr,
    2172             :                                 .pgd = NULL,
    2173             :                                 .numpages = numpages,
    2174             :                                 .mask_set = __pgprot(0),
    2175             :                                 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
    2176             :                                 .flags = 0};
    2177             : 
    2178             :         /*
    2179             :          * No alias checking needed for setting not present flag. otherwise,
    2180             :          * we may need to break large pages for 64-bit kernel text
    2181             :          * mappings (this adds to complexity if we want to do this from
    2182             :          * atomic context especially). Let's keep it simple!
    2183             :          */
    2184           0 :         return __change_page_attr_set_clr(&cpa, 0);
    2185             : }
    2186             : 
    2187           0 : int set_direct_map_invalid_noflush(struct page *page)
    2188             : {
    2189           0 :         return __set_pages_np(page, 1);
    2190             : }
    2191             : 
    2192           0 : int set_direct_map_default_noflush(struct page *page)
    2193             : {
    2194           0 :         return __set_pages_p(page, 1);
    2195             : }
    2196             : 
    2197             : #ifdef CONFIG_DEBUG_PAGEALLOC
    2198             : void __kernel_map_pages(struct page *page, int numpages, int enable)
    2199             : {
    2200             :         if (PageHighMem(page))
    2201             :                 return;
    2202             :         if (!enable) {
    2203             :                 debug_check_no_locks_freed(page_address(page),
    2204             :                                            numpages * PAGE_SIZE);
    2205             :         }
    2206             : 
    2207             :         /*
    2208             :          * The return value is ignored as the calls cannot fail.
    2209             :          * Large pages for identity mappings are not used at boot time
    2210             :          * and hence no memory allocations during large page split.
    2211             :          */
    2212             :         if (enable)
    2213             :                 __set_pages_p(page, numpages);
    2214             :         else
    2215             :                 __set_pages_np(page, numpages);
    2216             : 
    2217             :         /*
    2218             :          * We should perform an IPI and flush all tlbs,
    2219             :          * but that can deadlock->flush only current cpu.
    2220             :          * Preemption needs to be disabled around __flush_tlb_all() due to
    2221             :          * CR3 reload in __native_flush_tlb().
    2222             :          */
    2223             :         preempt_disable();
    2224             :         __flush_tlb_all();
    2225             :         preempt_enable();
    2226             : 
    2227             :         arch_flush_lazy_mmu_mode();
    2228             : }
    2229             : #endif /* CONFIG_DEBUG_PAGEALLOC */
    2230             : 
    2231           0 : bool kernel_page_present(struct page *page)
    2232             : {
    2233           0 :         unsigned int level;
    2234           0 :         pte_t *pte;
    2235             : 
    2236           0 :         if (PageHighMem(page))
    2237             :                 return false;
    2238             : 
    2239           0 :         pte = lookup_address((unsigned long)page_address(page), &level);
    2240           0 :         return (pte_val(*pte) & _PAGE_PRESENT);
    2241             : }
    2242             : 
    2243           0 : int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
    2244             :                                    unsigned numpages, unsigned long page_flags)
    2245             : {
    2246           0 :         int retval = -EINVAL;
    2247             : 
    2248           0 :         struct cpa_data cpa = {
    2249             :                 .vaddr = &address,
    2250             :                 .pfn = pfn,
    2251             :                 .pgd = pgd,
    2252             :                 .numpages = numpages,
    2253             :                 .mask_set = __pgprot(0),
    2254           0 :                 .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)),
    2255             :                 .flags = 0,
    2256             :         };
    2257             : 
    2258           0 :         WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
    2259             : 
    2260           0 :         if (!(__supported_pte_mask & _PAGE_NX))
    2261           0 :                 goto out;
    2262             : 
    2263           0 :         if (!(page_flags & _PAGE_ENC))
    2264           0 :                 cpa.mask_clr = pgprot_encrypted(cpa.mask_clr);
    2265             : 
    2266           0 :         cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);
    2267             : 
    2268           0 :         retval = __change_page_attr_set_clr(&cpa, 0);
    2269           0 :         __flush_tlb_all();
    2270             : 
    2271           0 : out:
    2272           0 :         return retval;
    2273             : }
    2274             : 
    2275             : /*
    2276             :  * __flush_tlb_all() flushes mappings only on current CPU and hence this
    2277             :  * function shouldn't be used in an SMP environment. Presently, it's used only
    2278             :  * during boot (way before smp_init()) by EFI subsystem and hence is ok.
    2279             :  */
    2280           0 : int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
    2281             :                                      unsigned long numpages)
    2282             : {
    2283           0 :         int retval;
    2284             : 
    2285             :         /*
    2286             :          * The typical sequence for unmapping is to find a pte through
    2287             :          * lookup_address_in_pgd() (ideally, it should never return NULL because
    2288             :          * the address is already mapped) and change it's protections. As pfn is
    2289             :          * the *target* of a mapping, it's not useful while unmapping.
    2290             :          */
    2291           0 :         struct cpa_data cpa = {
    2292             :                 .vaddr          = &address,
    2293             :                 .pfn            = 0,
    2294             :                 .pgd            = pgd,
    2295             :                 .numpages       = numpages,
    2296             :                 .mask_set       = __pgprot(0),
    2297             :                 .mask_clr       = __pgprot(_PAGE_PRESENT | _PAGE_RW),
    2298             :                 .flags          = 0,
    2299             :         };
    2300             : 
    2301           0 :         WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
    2302             : 
    2303           0 :         retval = __change_page_attr_set_clr(&cpa, 0);
    2304           0 :         __flush_tlb_all();
    2305             : 
    2306           0 :         return retval;
    2307             : }
    2308             : 
    2309             : /*
    2310             :  * The testcases use internal knowledge of the implementation that shouldn't
    2311             :  * be exposed to the rest of the kernel. Include these directly here.
    2312             :  */
    2313             : #ifdef CONFIG_CPA_DEBUG
    2314             : #include "cpa-test.c"
    2315             : #endif

Generated by: LCOV version 1.14