Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * Copyright 2002 Andi Kleen, SuSE Labs.
4 : * Thanks to Ben LaHaise for precious feedback.
5 : */
6 : #include <linux/highmem.h>
7 : #include <linux/memblock.h>
8 : #include <linux/sched.h>
9 : #include <linux/mm.h>
10 : #include <linux/interrupt.h>
11 : #include <linux/seq_file.h>
12 : #include <linux/debugfs.h>
13 : #include <linux/pfn.h>
14 : #include <linux/percpu.h>
15 : #include <linux/gfp.h>
16 : #include <linux/pci.h>
17 : #include <linux/vmalloc.h>
18 : #include <linux/libnvdimm.h>
19 :
20 : #include <asm/e820/api.h>
21 : #include <asm/processor.h>
22 : #include <asm/tlbflush.h>
23 : #include <asm/sections.h>
24 : #include <asm/setup.h>
25 : #include <linux/uaccess.h>
26 : #include <asm/pgalloc.h>
27 : #include <asm/proto.h>
28 : #include <asm/memtype.h>
29 : #include <asm/set_memory.h>
30 :
31 : #include "../mm_internal.h"
32 :
33 : /*
34 : * The current flushing context - we pass it instead of 5 arguments:
35 : */
36 : struct cpa_data {
37 : unsigned long *vaddr;
38 : pgd_t *pgd;
39 : pgprot_t mask_set;
40 : pgprot_t mask_clr;
41 : unsigned long numpages;
42 : unsigned long curpage;
43 : unsigned long pfn;
44 : unsigned int flags;
45 : unsigned int force_split : 1,
46 : force_static_prot : 1,
47 : force_flush_all : 1;
48 : struct page **pages;
49 : };
50 :
51 : enum cpa_warn {
52 : CPA_CONFLICT,
53 : CPA_PROTECT,
54 : CPA_DETECT,
55 : };
56 :
57 : static const int cpa_warn_level = CPA_PROTECT;
58 :
59 : /*
60 : * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings)
61 : * using cpa_lock. So that we don't allow any other cpu, with stale large tlb
62 : * entries change the page attribute in parallel to some other cpu
63 : * splitting a large page entry along with changing the attribute.
64 : */
65 : static DEFINE_SPINLOCK(cpa_lock);
66 :
67 : #define CPA_FLUSHTLB 1
68 : #define CPA_ARRAY 2
69 : #define CPA_PAGES_ARRAY 4
70 : #define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */
71 :
72 0 : static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm)
73 : {
74 0 : return __pgprot(cachemode2protval(pcm));
75 : }
76 :
77 : #ifdef CONFIG_PROC_FS
78 : static unsigned long direct_pages_count[PG_LEVEL_NUM];
79 :
80 12 : void update_page_count(int level, unsigned long pages)
81 : {
82 : /* Protect against CPA */
83 12 : spin_lock(&pgd_lock);
84 12 : direct_pages_count[level] += pages;
85 12 : spin_unlock(&pgd_lock);
86 12 : }
87 :
88 14 : static void split_page_count(int level)
89 : {
90 14 : if (direct_pages_count[level] == 0)
91 : return;
92 :
93 14 : direct_pages_count[level]--;
94 14 : direct_pages_count[level - 1] += PTRS_PER_PTE;
95 : }
96 :
97 1 : void arch_report_meminfo(struct seq_file *m)
98 : {
99 1 : seq_printf(m, "DirectMap4k: %8lu kB\n",
100 1 : direct_pages_count[PG_LEVEL_4K] << 2);
101 : #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
102 1 : seq_printf(m, "DirectMap2M: %8lu kB\n",
103 1 : direct_pages_count[PG_LEVEL_2M] << 11);
104 : #else
105 : seq_printf(m, "DirectMap4M: %8lu kB\n",
106 : direct_pages_count[PG_LEVEL_2M] << 12);
107 : #endif
108 1 : if (direct_gbpages)
109 1 : seq_printf(m, "DirectMap1G: %8lu kB\n",
110 1 : direct_pages_count[PG_LEVEL_1G] << 20);
111 1 : }
112 : #else
113 : static inline void split_page_count(int level) { }
114 : #endif
115 :
116 : #ifdef CONFIG_X86_CPA_STATISTICS
117 :
118 : static unsigned long cpa_1g_checked;
119 : static unsigned long cpa_1g_sameprot;
120 : static unsigned long cpa_1g_preserved;
121 : static unsigned long cpa_2m_checked;
122 : static unsigned long cpa_2m_sameprot;
123 : static unsigned long cpa_2m_preserved;
124 : static unsigned long cpa_4k_install;
125 :
126 : static inline void cpa_inc_1g_checked(void)
127 : {
128 : cpa_1g_checked++;
129 : }
130 :
131 : static inline void cpa_inc_2m_checked(void)
132 : {
133 : cpa_2m_checked++;
134 : }
135 :
136 : static inline void cpa_inc_4k_install(void)
137 : {
138 : data_race(cpa_4k_install++);
139 : }
140 :
141 : static inline void cpa_inc_lp_sameprot(int level)
142 : {
143 : if (level == PG_LEVEL_1G)
144 : cpa_1g_sameprot++;
145 : else
146 : cpa_2m_sameprot++;
147 : }
148 :
149 : static inline void cpa_inc_lp_preserved(int level)
150 : {
151 : if (level == PG_LEVEL_1G)
152 : cpa_1g_preserved++;
153 : else
154 : cpa_2m_preserved++;
155 : }
156 :
157 : static int cpastats_show(struct seq_file *m, void *p)
158 : {
159 : seq_printf(m, "1G pages checked: %16lu\n", cpa_1g_checked);
160 : seq_printf(m, "1G pages sameprot: %16lu\n", cpa_1g_sameprot);
161 : seq_printf(m, "1G pages preserved: %16lu\n", cpa_1g_preserved);
162 : seq_printf(m, "2M pages checked: %16lu\n", cpa_2m_checked);
163 : seq_printf(m, "2M pages sameprot: %16lu\n", cpa_2m_sameprot);
164 : seq_printf(m, "2M pages preserved: %16lu\n", cpa_2m_preserved);
165 : seq_printf(m, "4K pages set-checked: %16lu\n", cpa_4k_install);
166 : return 0;
167 : }
168 :
169 : static int cpastats_open(struct inode *inode, struct file *file)
170 : {
171 : return single_open(file, cpastats_show, NULL);
172 : }
173 :
174 : static const struct file_operations cpastats_fops = {
175 : .open = cpastats_open,
176 : .read = seq_read,
177 : .llseek = seq_lseek,
178 : .release = single_release,
179 : };
180 :
181 : static int __init cpa_stats_init(void)
182 : {
183 : debugfs_create_file("cpa_stats", S_IRUSR, arch_debugfs_dir, NULL,
184 : &cpastats_fops);
185 : return 0;
186 : }
187 : late_initcall(cpa_stats_init);
188 : #else
189 0 : static inline void cpa_inc_1g_checked(void) { }
190 153 : static inline void cpa_inc_2m_checked(void) { }
191 4234 : static inline void cpa_inc_4k_install(void) { }
192 : static inline void cpa_inc_lp_sameprot(int level) { }
193 50 : static inline void cpa_inc_lp_preserved(int level) { }
194 : #endif
195 :
196 :
197 : static inline int
198 1976 : within(unsigned long addr, unsigned long start, unsigned long end)
199 : {
200 1976 : return addr >= start && addr < end;
201 : }
202 :
203 : static inline int
204 9 : within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
205 : {
206 9 : return addr >= start && addr <= end;
207 : }
208 :
209 : #ifdef CONFIG_X86_64
210 :
211 9 : static inline unsigned long highmap_start_pfn(void)
212 : {
213 9 : return __pa_symbol(_text) >> PAGE_SHIFT;
214 : }
215 :
216 9 : static inline unsigned long highmap_end_pfn(void)
217 : {
218 : /* Do not reference physical address outside the kernel. */
219 9 : return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT;
220 : }
221 :
222 9 : static bool __cpa_pfn_in_highmap(unsigned long pfn)
223 : {
224 : /*
225 : * Kernel text has an alias mapping at a high address, known
226 : * here as "highmap".
227 : */
228 9 : return within_inclusive(pfn, highmap_start_pfn(), highmap_end_pfn());
229 : }
230 :
231 : #else
232 :
233 : static bool __cpa_pfn_in_highmap(unsigned long pfn)
234 : {
235 : /* There is no highmap on 32-bit */
236 : return false;
237 : }
238 :
239 : #endif
240 :
241 : /*
242 : * See set_mce_nospec().
243 : *
244 : * Machine check recovery code needs to change cache mode of poisoned pages to
245 : * UC to avoid speculative access logging another error. But passing the
246 : * address of the 1:1 mapping to set_memory_uc() is a fine way to encourage a
247 : * speculative access. So we cheat and flip the top bit of the address. This
248 : * works fine for the code that updates the page tables. But at the end of the
249 : * process we need to flush the TLB and cache and the non-canonical address
250 : * causes a #GP fault when used by the INVLPG and CLFLUSH instructions.
251 : *
252 : * But in the common case we already have a canonical address. This code
253 : * will fix the top bit if needed and is a no-op otherwise.
254 : */
255 7 : static inline unsigned long fix_addr(unsigned long addr)
256 : {
257 : #ifdef CONFIG_X86_64
258 7 : return (long)(addr << 1) >> 1;
259 : #else
260 : return addr;
261 : #endif
262 : }
263 :
264 5368 : static unsigned long __cpa_addr(struct cpa_data *cpa, unsigned long idx)
265 : {
266 5368 : if (cpa->flags & CPA_PAGES_ARRAY) {
267 0 : struct page *page = cpa->pages[idx];
268 :
269 0 : if (unlikely(PageHighMem(page)))
270 : return 0;
271 :
272 0 : return (unsigned long)page_address(page);
273 : }
274 :
275 5368 : if (cpa->flags & CPA_ARRAY)
276 0 : return cpa->vaddr[idx];
277 :
278 5368 : return *cpa->vaddr + idx * PAGE_SIZE;
279 : }
280 :
281 : /*
282 : * Flushing functions
283 : */
284 :
285 0 : static void clflush_cache_range_opt(void *vaddr, unsigned int size)
286 : {
287 0 : const unsigned long clflush_size = boot_cpu_data.x86_clflush_size;
288 0 : void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1));
289 0 : void *vend = vaddr + size;
290 :
291 0 : if (p >= vend)
292 : return;
293 :
294 0 : for (; p < vend; p += clflush_size)
295 0 : clflushopt(p);
296 : }
297 :
298 : /**
299 : * clflush_cache_range - flush a cache range with clflush
300 : * @vaddr: virtual start address
301 : * @size: number of bytes to flush
302 : *
303 : * CLFLUSHOPT is an unordered instruction which needs fencing with MFENCE or
304 : * SFENCE to avoid ordering issues.
305 : */
306 0 : void clflush_cache_range(void *vaddr, unsigned int size)
307 : {
308 0 : mb();
309 0 : clflush_cache_range_opt(vaddr, size);
310 0 : mb();
311 0 : }
312 : EXPORT_SYMBOL_GPL(clflush_cache_range);
313 :
314 : #ifdef CONFIG_ARCH_HAS_PMEM_API
315 0 : void arch_invalidate_pmem(void *addr, size_t size)
316 : {
317 0 : clflush_cache_range(addr, size);
318 0 : }
319 : EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
320 : #endif
321 :
322 0 : static void __cpa_flush_all(void *arg)
323 : {
324 0 : unsigned long cache = (unsigned long)arg;
325 :
326 : /*
327 : * Flush all to work around Errata in early athlons regarding
328 : * large page flushing.
329 : */
330 0 : __flush_tlb_all();
331 :
332 0 : if (cache && boot_cpu_data.x86 >= 4)
333 0 : wbinvd();
334 0 : }
335 :
336 0 : static void cpa_flush_all(unsigned long cache)
337 : {
338 0 : BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
339 :
340 0 : on_each_cpu(__cpa_flush_all, (void *) cache, 1);
341 0 : }
342 :
343 3 : static void __cpa_flush_tlb(void *data)
344 : {
345 3 : struct cpa_data *cpa = data;
346 3 : unsigned int i;
347 :
348 10 : for (i = 0; i < cpa->numpages; i++)
349 7 : flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i)));
350 3 : }
351 :
352 16 : static void cpa_flush(struct cpa_data *data, int cache)
353 : {
354 16 : struct cpa_data *cpa = data;
355 16 : unsigned int i;
356 :
357 16 : BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
358 :
359 16 : if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
360 0 : cpa_flush_all(cache);
361 0 : return;
362 : }
363 :
364 16 : if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling)
365 13 : flush_tlb_all();
366 : else
367 3 : on_each_cpu(__cpa_flush_tlb, cpa, 1);
368 :
369 16 : if (!cache)
370 : return;
371 :
372 0 : mb();
373 0 : for (i = 0; i < cpa->numpages; i++) {
374 0 : unsigned long addr = __cpa_addr(cpa, i);
375 0 : unsigned int level;
376 :
377 0 : pte_t *pte = lookup_address(addr, &level);
378 :
379 : /*
380 : * Only flush present addresses:
381 : */
382 0 : if (pte && (pte_val(*pte) & _PAGE_PRESENT))
383 0 : clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE);
384 : }
385 0 : mb();
386 : }
387 :
388 11153 : static bool overlaps(unsigned long r1_start, unsigned long r1_end,
389 : unsigned long r2_start, unsigned long r2_end)
390 : {
391 11153 : return (r1_start <= r2_end && r1_end >= r2_start) ||
392 : (r2_start <= r1_end && r2_end >= r1_start);
393 : }
394 :
395 : #ifdef CONFIG_PCI_BIOS
396 : /*
397 : * The BIOS area between 640k and 1Mb needs to be executable for PCI BIOS
398 : * based config access (CONFIG_PCI_GOBIOS) support.
399 : */
400 : #define BIOS_PFN PFN_DOWN(BIOS_BEGIN)
401 : #define BIOS_PFN_END PFN_DOWN(BIOS_END - 1)
402 :
403 : static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn)
404 : {
405 : if (pcibios_enabled && overlaps(spfn, epfn, BIOS_PFN, BIOS_PFN_END))
406 : return _PAGE_NX;
407 : return 0;
408 : }
409 : #else
410 4437 : static pgprotval_t protect_pci_bios(unsigned long spfn, unsigned long epfn)
411 : {
412 4437 : return 0;
413 : }
414 : #endif
415 :
416 : /*
417 : * The .rodata section needs to be read-only. Using the pfn catches all
418 : * aliases. This also includes __ro_after_init, so do not enforce until
419 : * kernel_set_to_readonly is true.
420 : */
421 4437 : static pgprotval_t protect_rodata(unsigned long spfn, unsigned long epfn)
422 : {
423 4437 : unsigned long epfn_ro, spfn_ro = PFN_DOWN(__pa_symbol(__start_rodata));
424 :
425 : /*
426 : * Note: __end_rodata is at page aligned and not inclusive, so
427 : * subtract 1 to get the last enforced PFN in the rodata area.
428 : */
429 4437 : epfn_ro = PFN_DOWN(__pa_symbol(__end_rodata)) - 1;
430 :
431 4437 : if (kernel_set_to_readonly && overlaps(spfn, epfn, spfn_ro, epfn_ro))
432 9 : return _PAGE_RW;
433 : return 0;
434 : }
435 :
436 : /*
437 : * Protect kernel text against becoming non executable by forbidding
438 : * _PAGE_NX. This protects only the high kernel mapping (_text -> _etext)
439 : * out of which the kernel actually executes. Do not protect the low
440 : * mapping.
441 : *
442 : * This does not cover __inittext since that is gone after boot.
443 : */
444 4437 : static pgprotval_t protect_kernel_text(unsigned long start, unsigned long end)
445 : {
446 4437 : unsigned long t_end = (unsigned long)_etext - 1;
447 4437 : unsigned long t_start = (unsigned long)_text;
448 :
449 4437 : if (overlaps(start, end, t_start, t_end))
450 23 : return _PAGE_NX;
451 : return 0;
452 : }
453 :
454 : #if defined(CONFIG_X86_64)
455 : /*
456 : * Once the kernel maps the text as RO (kernel_set_to_readonly is set),
457 : * kernel text mappings for the large page aligned text, rodata sections
458 : * will be always read-only. For the kernel identity mappings covering the
459 : * holes caused by this alignment can be anything that user asks.
460 : *
461 : * This will preserve the large page mappings for kernel text/data at no
462 : * extra cost.
463 : */
464 4234 : static pgprotval_t protect_kernel_text_ro(unsigned long start,
465 : unsigned long end)
466 : {
467 4234 : unsigned long t_end = (unsigned long)__end_rodata_hpage_align - 1;
468 4234 : unsigned long t_start = (unsigned long)_text;
469 4234 : unsigned int level;
470 :
471 4234 : if (!kernel_set_to_readonly || !overlaps(start, end, t_start, t_end))
472 : return 0;
473 : /*
474 : * Don't enforce the !RW mapping for the kernel text mapping, if
475 : * the current mapping is already using small page mapping. No
476 : * need to work hard to preserve large page mappings in this case.
477 : *
478 : * This also fixes the Linux Xen paravirt guest boot failure caused
479 : * by unexpected read-only mappings for kernel identity
480 : * mappings. In this paravirt guest case, the kernel text mapping
481 : * and the kernel identity mapping share the same page-table pages,
482 : * so the protections for kernel text and identity mappings have to
483 : * be the same.
484 : */
485 1655 : if (lookup_address(start, &level) && (level != PG_LEVEL_4K))
486 0 : return _PAGE_RW;
487 : return 0;
488 : }
489 : #else
490 : static pgprotval_t protect_kernel_text_ro(unsigned long start,
491 : unsigned long end)
492 : {
493 : return 0;
494 : }
495 : #endif
496 :
497 17395 : static inline bool conflicts(pgprot_t prot, pgprotval_t val)
498 : {
499 17395 : return (pgprot_val(prot) & ~val) != pgprot_val(prot);
500 : }
501 :
502 17545 : static inline void check_conflict(int warnlvl, pgprot_t prot, pgprotval_t val,
503 : unsigned long start, unsigned long end,
504 : unsigned long pfn, const char *txt)
505 : {
506 17545 : static const char *lvltxt[] = {
507 : [CPA_CONFLICT] = "conflict",
508 : [CPA_PROTECT] = "protect",
509 : [CPA_DETECT] = "detect",
510 : };
511 :
512 17545 : if (warnlvl > cpa_warn_level || !conflicts(prot, val))
513 : return;
514 :
515 0 : pr_warn("CPA %8s %10s: 0x%016lx - 0x%016lx PFN %lx req %016llx prevent %016llx\n",
516 : lvltxt[warnlvl], txt, start, end, pfn, (unsigned long long)pgprot_val(prot),
517 : (unsigned long long)val);
518 : }
519 :
520 : /*
521 : * Certain areas of memory on x86 require very specific protection flags,
522 : * for example the BIOS area or kernel text. Callers don't always get this
523 : * right (again, ioremap() on BIOS memory is not uncommon) so this function
524 : * checks and fixes these known static required protection bits.
525 : */
526 4437 : static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
527 : unsigned long pfn, unsigned long npg,
528 : unsigned long lpsize, int warnlvl)
529 : {
530 4437 : pgprotval_t forbidden, res;
531 4437 : unsigned long end;
532 :
533 : /*
534 : * There is no point in checking RW/NX conflicts when the requested
535 : * mapping is setting the page !PRESENT.
536 : */
537 4437 : if (!(pgprot_val(prot) & _PAGE_PRESENT))
538 0 : return prot;
539 :
540 : /* Operate on the virtual address */
541 4437 : end = start + npg * PAGE_SIZE - 1;
542 :
543 4437 : res = protect_kernel_text(start, end);
544 4437 : check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX");
545 4437 : forbidden = res;
546 :
547 : /*
548 : * Special case to preserve a large page. If the change spawns the
549 : * full large page mapping then there is no point to split it
550 : * up. Happens with ftrace and is going to be removed once ftrace
551 : * switched to text_poke().
552 : */
553 4437 : if (lpsize != (npg * PAGE_SIZE) || (start & (lpsize - 1))) {
554 4234 : res = protect_kernel_text_ro(start, end);
555 4234 : check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO");
556 4234 : forbidden |= res;
557 : }
558 :
559 : /* Check the PFN directly */
560 4437 : res = protect_pci_bios(pfn, pfn + npg - 1);
561 4437 : check_conflict(warnlvl, prot, res, start, end, pfn, "PCIBIOS NX");
562 4437 : forbidden |= res;
563 :
564 4437 : res = protect_rodata(pfn, pfn + npg - 1);
565 4437 : check_conflict(warnlvl, prot, res, start, end, pfn, "Rodata RO");
566 4437 : forbidden |= res;
567 :
568 4437 : return __pgprot(pgprot_val(prot) & ~forbidden);
569 : }
570 :
571 : /*
572 : * Lookup the page table entry for a virtual address in a specific pgd.
573 : * Return a pointer to the entry and the level of the mapping.
574 : */
575 6226 : pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
576 : unsigned int *level)
577 : {
578 6226 : p4d_t *p4d;
579 6226 : pud_t *pud;
580 6226 : pmd_t *pmd;
581 :
582 6226 : *level = PG_LEVEL_NONE;
583 :
584 6226 : if (pgd_none(*pgd))
585 : return NULL;
586 :
587 6226 : p4d = p4d_offset(pgd, address);
588 6226 : if (p4d_none(*p4d))
589 : return NULL;
590 :
591 6226 : *level = PG_LEVEL_512G;
592 6226 : if (p4d_large(*p4d) || !p4d_present(*p4d))
593 : return (pte_t *)p4d;
594 :
595 6226 : pud = pud_offset(p4d, address);
596 6226 : if (pud_none(*pud))
597 : return NULL;
598 :
599 6226 : *level = PG_LEVEL_1G;
600 12452 : if (pud_large(*pud) || !pud_present(*pud))
601 : return (pte_t *)pud;
602 :
603 6226 : pmd = pmd_offset(pud, address);
604 6226 : if (pmd_none(*pmd))
605 : return NULL;
606 :
607 6226 : *level = PG_LEVEL_2M;
608 12119 : if (pmd_large(*pmd) || !pmd_present(*pmd))
609 : return (pte_t *)pmd;
610 :
611 5893 : *level = PG_LEVEL_4K;
612 :
613 11786 : return pte_offset_kernel(pmd, address);
614 : }
615 :
616 : /*
617 : * Lookup the page table entry for a virtual address. Return a pointer
618 : * to the entry and the level of the mapping.
619 : *
620 : * Note: We return pud and pmd either when the entry is marked large
621 : * or when the present bit is not set. Otherwise we would return a
622 : * pointer to a nonexisting mapping.
623 : */
624 6226 : pte_t *lookup_address(unsigned long address, unsigned int *level)
625 : {
626 6226 : return lookup_address_in_pgd(pgd_offset_k(address), address, level);
627 : }
628 : EXPORT_SYMBOL_GPL(lookup_address);
629 :
630 : /*
631 : * Lookup the page table entry for a virtual address in a given mm. Return a
632 : * pointer to the entry and the level of the mapping.
633 : */
634 0 : pte_t *lookup_address_in_mm(struct mm_struct *mm, unsigned long address,
635 : unsigned int *level)
636 : {
637 0 : return lookup_address_in_pgd(pgd_offset(mm, address), address, level);
638 : }
639 : EXPORT_SYMBOL_GPL(lookup_address_in_mm);
640 :
641 4554 : static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
642 : unsigned int *level)
643 : {
644 4554 : if (cpa->pgd)
645 0 : return lookup_address_in_pgd(cpa->pgd + pgd_index(address),
646 : address, level);
647 :
648 4554 : return lookup_address(address, level);
649 : }
650 :
651 : /*
652 : * Lookup the PMD entry for a virtual address. Return a pointer to the entry
653 : * or NULL if not present.
654 : */
655 0 : pmd_t *lookup_pmd_address(unsigned long address)
656 : {
657 0 : pgd_t *pgd;
658 0 : p4d_t *p4d;
659 0 : pud_t *pud;
660 :
661 0 : pgd = pgd_offset_k(address);
662 0 : if (pgd_none(*pgd))
663 : return NULL;
664 :
665 0 : p4d = p4d_offset(pgd, address);
666 0 : if (p4d_none(*p4d) || p4d_large(*p4d) || !p4d_present(*p4d))
667 0 : return NULL;
668 :
669 0 : pud = pud_offset(p4d, address);
670 0 : if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
671 : return NULL;
672 :
673 0 : return pmd_offset(pud, address);
674 : }
675 :
676 : /*
677 : * This is necessary because __pa() does not work on some
678 : * kinds of memory, like vmalloc() or the alloc_remap()
679 : * areas on 32-bit NUMA systems. The percpu areas can
680 : * end up in this kind of memory, for instance.
681 : *
682 : * This could be optimized, but it is only intended to be
683 : * used at inititalization time, and keeping it
684 : * unoptimized should increase the testing coverage for
685 : * the more obscure platforms.
686 : */
687 17 : phys_addr_t slow_virt_to_phys(void *__virt_addr)
688 : {
689 17 : unsigned long virt_addr = (unsigned long)__virt_addr;
690 17 : phys_addr_t phys_addr;
691 17 : unsigned long offset;
692 17 : enum pg_level level;
693 17 : pte_t *pte;
694 :
695 17 : pte = lookup_address(virt_addr, &level);
696 17 : BUG_ON(!pte);
697 :
698 : /*
699 : * pXX_pfn() returns unsigned long, which must be cast to phys_addr_t
700 : * before being left-shifted PAGE_SHIFT bits -- this trick is to
701 : * make 32-PAE kernel work correctly.
702 : */
703 17 : switch (level) {
704 0 : case PG_LEVEL_1G:
705 0 : phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
706 0 : offset = virt_addr & ~PUD_PAGE_MASK;
707 0 : break;
708 13 : case PG_LEVEL_2M:
709 13 : phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
710 13 : offset = virt_addr & ~PMD_PAGE_MASK;
711 13 : break;
712 4 : default:
713 4 : phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
714 4 : offset = virt_addr & ~PAGE_MASK;
715 : }
716 :
717 17 : return (phys_addr_t)(phys_addr | offset);
718 : }
719 : EXPORT_SYMBOL_GPL(slow_virt_to_phys);
720 :
721 : /*
722 : * Set the new pmd in all the pgds we know about:
723 : */
724 64 : static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
725 : {
726 : /* change init_mm */
727 64 : set_pte_atomic(kpte, pte);
728 : #ifdef CONFIG_X86_32
729 : if (!SHARED_KERNEL_PMD) {
730 : struct page *page;
731 :
732 : list_for_each_entry(page, &pgd_list, lru) {
733 : pgd_t *pgd;
734 : p4d_t *p4d;
735 : pud_t *pud;
736 : pmd_t *pmd;
737 :
738 : pgd = (pgd_t *)page_address(page) + pgd_index(address);
739 : p4d = p4d_offset(pgd, address);
740 : pud = pud_offset(p4d, address);
741 : pmd = pmd_offset(pud, address);
742 : set_pte_atomic((pte_t *)pmd, pte);
743 : }
744 : }
745 : #endif
746 : }
747 :
748 4401 : static pgprot_t pgprot_clear_protnone_bits(pgprot_t prot)
749 : {
750 : /*
751 : * _PAGE_GLOBAL means "global page" for present PTEs.
752 : * But, it is also used to indicate _PAGE_PROTNONE
753 : * for non-present PTEs.
754 : *
755 : * This ensures that a _PAGE_GLOBAL PTE going from
756 : * present to non-present is not confused as
757 : * _PAGE_PROTNONE.
758 : */
759 4401 : if (!(pgprot_val(prot) & _PAGE_PRESENT))
760 0 : pgprot_val(prot) &= ~_PAGE_GLOBAL;
761 :
762 4401 : return prot;
763 : }
764 :
765 153 : static int __should_split_large_page(pte_t *kpte, unsigned long address,
766 : struct cpa_data *cpa)
767 : {
768 153 : unsigned long numpages, pmask, psize, lpaddr, pfn, old_pfn;
769 153 : pgprot_t old_prot, new_prot, req_prot, chk_prot;
770 153 : pte_t new_pte, *tmp;
771 153 : enum pg_level level;
772 :
773 : /*
774 : * Check for races, another CPU might have split this page
775 : * up already:
776 : */
777 153 : tmp = _lookup_address_cpa(cpa, address, &level);
778 153 : if (tmp != kpte)
779 : return 1;
780 :
781 153 : switch (level) {
782 153 : case PG_LEVEL_2M:
783 153 : old_prot = pmd_pgprot(*(pmd_t *)kpte);
784 153 : old_pfn = pmd_pfn(*(pmd_t *)kpte);
785 153 : cpa_inc_2m_checked();
786 : break;
787 0 : case PG_LEVEL_1G:
788 0 : old_prot = pud_pgprot(*(pud_t *)kpte);
789 0 : old_pfn = pud_pfn(*(pud_t *)kpte);
790 0 : cpa_inc_1g_checked();
791 : break;
792 : default:
793 : return -EINVAL;
794 : }
795 :
796 153 : psize = page_level_size(level);
797 153 : pmask = page_level_mask(level);
798 :
799 : /*
800 : * Calculate the number of pages, which fit into this large
801 : * page starting at address:
802 : */
803 153 : lpaddr = (address + psize) & pmask;
804 153 : numpages = (lpaddr - address) >> PAGE_SHIFT;
805 153 : if (numpages < cpa->numpages)
806 36 : cpa->numpages = numpages;
807 :
808 : /*
809 : * We are safe now. Check whether the new pgprot is the same:
810 : * Convert protection attributes to 4k-format, as cpa->mask* are set
811 : * up accordingly.
812 : */
813 :
814 : /* Clear PSE (aka _PAGE_PAT) and move PAT bit to correct position */
815 153 : req_prot = pgprot_large_2_4k(old_prot);
816 :
817 153 : pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
818 153 : pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
819 :
820 : /*
821 : * req_prot is in format of 4k pages. It must be converted to large
822 : * page format: the caching mode includes the PAT bit located at
823 : * different bit positions in the two formats.
824 : */
825 153 : req_prot = pgprot_4k_2_large(req_prot);
826 153 : req_prot = pgprot_clear_protnone_bits(req_prot);
827 153 : if (pgprot_val(req_prot) & _PAGE_PRESENT)
828 153 : pgprot_val(req_prot) |= _PAGE_PSE;
829 :
830 : /*
831 : * old_pfn points to the large page base pfn. So we need to add the
832 : * offset of the virtual address:
833 : */
834 153 : pfn = old_pfn + ((address & (psize - 1)) >> PAGE_SHIFT);
835 153 : cpa->pfn = pfn;
836 :
837 : /*
838 : * Calculate the large page base address and the number of 4K pages
839 : * in the large page
840 : */
841 153 : lpaddr = address & pmask;
842 153 : numpages = psize >> PAGE_SHIFT;
843 :
844 : /*
845 : * Sanity check that the existing mapping is correct versus the static
846 : * protections. static_protections() guards against !PRESENT, so no
847 : * extra conditional required here.
848 : */
849 153 : chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages,
850 : psize, CPA_CONFLICT);
851 :
852 153 : if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) {
853 : /*
854 : * Split the large page and tell the split code to
855 : * enforce static protections.
856 : */
857 0 : cpa->force_static_prot = 1;
858 0 : return 1;
859 : }
860 :
861 : /*
862 : * Optimization: If the requested pgprot is the same as the current
863 : * pgprot, then the large page can be preserved and no updates are
864 : * required independent of alignment and length of the requested
865 : * range. The above already established that the current pgprot is
866 : * correct, which in consequence makes the requested pgprot correct
867 : * as well if it is the same. The static protection scan below will
868 : * not come to a different conclusion.
869 : */
870 153 : if (pgprot_val(req_prot) == pgprot_val(old_prot)) {
871 153 : cpa_inc_lp_sameprot(level);
872 : return 0;
873 : }
874 :
875 : /*
876 : * If the requested range does not cover the full page, split it up
877 : */
878 64 : if (address != lpaddr || cpa->numpages != numpages)
879 : return 1;
880 :
881 : /*
882 : * Check whether the requested pgprot is conflicting with a static
883 : * protection requirement in the large page.
884 : */
885 50 : new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
886 : psize, CPA_DETECT);
887 :
888 : /*
889 : * If there is a conflict, split the large page.
890 : *
891 : * There used to be a 4k wise evaluation trying really hard to
892 : * preserve the large pages, but experimentation has shown, that this
893 : * does not help at all. There might be corner cases which would
894 : * preserve one large page occasionally, but it's really not worth the
895 : * extra code and cycles for the common case.
896 : */
897 50 : if (pgprot_val(req_prot) != pgprot_val(new_prot))
898 : return 1;
899 :
900 : /* All checks passed. Update the large page mapping. */
901 50 : new_pte = pfn_pte(old_pfn, new_prot);
902 50 : __set_pmd_pte(kpte, address, new_pte);
903 50 : cpa->flags |= CPA_FLUSHTLB;
904 50 : cpa_inc_lp_preserved(level);
905 50 : return 0;
906 : }
907 :
908 153 : static int should_split_large_page(pte_t *kpte, unsigned long address,
909 : struct cpa_data *cpa)
910 : {
911 153 : int do_split;
912 :
913 153 : if (cpa->force_split)
914 : return 1;
915 :
916 153 : spin_lock(&pgd_lock);
917 153 : do_split = __should_split_large_page(kpte, address, cpa);
918 153 : spin_unlock(&pgd_lock);
919 :
920 153 : return do_split;
921 : }
922 :
923 7168 : static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn,
924 : pgprot_t ref_prot, unsigned long address,
925 : unsigned long size)
926 : {
927 7168 : unsigned int npg = PFN_DOWN(size);
928 7168 : pgprot_t prot;
929 :
930 : /*
931 : * If should_split_large_page() discovered an inconsistent mapping,
932 : * remove the invalid protection in the split mapping.
933 : */
934 7168 : if (!cpa->force_static_prot)
935 7168 : goto set;
936 :
937 : /* Hand in lpsize = 0 to enforce the protection mechanism */
938 0 : prot = static_protections(ref_prot, address, pfn, npg, 0, CPA_PROTECT);
939 :
940 0 : if (pgprot_val(prot) == pgprot_val(ref_prot))
941 0 : goto set;
942 :
943 : /*
944 : * If this is splitting a PMD, fix it up. PUD splits cannot be
945 : * fixed trivially as that would require to rescan the newly
946 : * installed PMD mappings after returning from split_large_page()
947 : * so an eventual further split can allocate the necessary PTE
948 : * pages. Warn for now and revisit it in case this actually
949 : * happens.
950 : */
951 0 : if (size == PAGE_SIZE)
952 : ref_prot = prot;
953 : else
954 0 : pr_warn_once("CPA: Cannot fixup static protections for PUD split\n");
955 7168 : set:
956 7168 : set_pte(pte, pfn_pte(pfn, ref_prot));
957 7168 : }
958 :
959 : static int
960 14 : __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
961 : struct page *base)
962 : {
963 14 : unsigned long lpaddr, lpinc, ref_pfn, pfn, pfninc = 1;
964 14 : pte_t *pbase = (pte_t *)page_address(base);
965 14 : unsigned int i, level;
966 14 : pgprot_t ref_prot;
967 14 : pte_t *tmp;
968 :
969 14 : spin_lock(&pgd_lock);
970 : /*
971 : * Check for races, another CPU might have split this page
972 : * up for us already:
973 : */
974 14 : tmp = _lookup_address_cpa(cpa, address, &level);
975 14 : if (tmp != kpte) {
976 0 : spin_unlock(&pgd_lock);
977 0 : return 1;
978 : }
979 :
980 14 : paravirt_alloc_pte(&init_mm, page_to_pfn(base));
981 :
982 14 : switch (level) {
983 14 : case PG_LEVEL_2M:
984 14 : ref_prot = pmd_pgprot(*(pmd_t *)kpte);
985 : /*
986 : * Clear PSE (aka _PAGE_PAT) and move
987 : * PAT bit to correct position.
988 : */
989 14 : ref_prot = pgprot_large_2_4k(ref_prot);
990 14 : ref_pfn = pmd_pfn(*(pmd_t *)kpte);
991 14 : lpaddr = address & PMD_MASK;
992 14 : lpinc = PAGE_SIZE;
993 14 : break;
994 :
995 0 : case PG_LEVEL_1G:
996 0 : ref_prot = pud_pgprot(*(pud_t *)kpte);
997 0 : ref_pfn = pud_pfn(*(pud_t *)kpte);
998 0 : pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
999 0 : lpaddr = address & PUD_MASK;
1000 0 : lpinc = PMD_SIZE;
1001 : /*
1002 : * Clear the PSE flags if the PRESENT flag is not set
1003 : * otherwise pmd_present/pmd_huge will return true
1004 : * even on a non present pmd.
1005 : */
1006 0 : if (!(pgprot_val(ref_prot) & _PAGE_PRESENT))
1007 0 : pgprot_val(ref_prot) &= ~_PAGE_PSE;
1008 : break;
1009 :
1010 : default:
1011 0 : spin_unlock(&pgd_lock);
1012 0 : return 1;
1013 : }
1014 :
1015 14 : ref_prot = pgprot_clear_protnone_bits(ref_prot);
1016 :
1017 : /*
1018 : * Get the target pfn from the original entry:
1019 : */
1020 14 : pfn = ref_pfn;
1021 7182 : for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc, lpaddr += lpinc)
1022 7168 : split_set_pte(cpa, pbase + i, pfn, ref_prot, lpaddr, lpinc);
1023 :
1024 14 : if (virt_addr_valid(address)) {
1025 14 : unsigned long pfn = PFN_DOWN(__pa(address));
1026 :
1027 14 : if (pfn_range_is_mapped(pfn, pfn + 1))
1028 14 : split_page_count(level);
1029 : }
1030 :
1031 : /*
1032 : * Install the new, split up pagetable.
1033 : *
1034 : * We use the standard kernel pagetable protections for the new
1035 : * pagetable protections, the actual ptes set above control the
1036 : * primary protection behavior:
1037 : */
1038 14 : __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE)));
1039 :
1040 : /*
1041 : * Do a global flush tlb after splitting the large page
1042 : * and before we do the actual change page attribute in the PTE.
1043 : *
1044 : * Without this, we violate the TLB application note, that says:
1045 : * "The TLBs may contain both ordinary and large-page
1046 : * translations for a 4-KByte range of linear addresses. This
1047 : * may occur if software modifies the paging structures so that
1048 : * the page size used for the address range changes. If the two
1049 : * translations differ with respect to page frame or attributes
1050 : * (e.g., permissions), processor behavior is undefined and may
1051 : * be implementation-specific."
1052 : *
1053 : * We do this global tlb flush inside the cpa_lock, so that we
1054 : * don't allow any other cpu, with stale tlb entries change the
1055 : * page attribute in parallel, that also falls into the
1056 : * just split large page entry.
1057 : */
1058 14 : flush_tlb_all();
1059 14 : spin_unlock(&pgd_lock);
1060 :
1061 14 : return 0;
1062 : }
1063 :
1064 14 : static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
1065 : unsigned long address)
1066 : {
1067 14 : struct page *base;
1068 :
1069 14 : if (!debug_pagealloc_enabled())
1070 14 : spin_unlock(&cpa_lock);
1071 14 : base = alloc_pages(GFP_KERNEL, 0);
1072 14 : if (!debug_pagealloc_enabled())
1073 14 : spin_lock(&cpa_lock);
1074 14 : if (!base)
1075 : return -ENOMEM;
1076 :
1077 14 : if (__split_large_page(cpa, kpte, address, base))
1078 0 : __free_page(base);
1079 :
1080 : return 0;
1081 : }
1082 :
1083 0 : static bool try_to_free_pte_page(pte_t *pte)
1084 : {
1085 0 : int i;
1086 :
1087 0 : for (i = 0; i < PTRS_PER_PTE; i++)
1088 0 : if (!pte_none(pte[i]))
1089 : return false;
1090 :
1091 0 : free_page((unsigned long)pte);
1092 0 : return true;
1093 : }
1094 :
1095 0 : static bool try_to_free_pmd_page(pmd_t *pmd)
1096 : {
1097 0 : int i;
1098 :
1099 0 : for (i = 0; i < PTRS_PER_PMD; i++)
1100 0 : if (!pmd_none(pmd[i]))
1101 : return false;
1102 :
1103 0 : free_page((unsigned long)pmd);
1104 0 : return true;
1105 : }
1106 :
1107 0 : static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
1108 : {
1109 0 : pte_t *pte = pte_offset_kernel(pmd, start);
1110 :
1111 0 : while (start < end) {
1112 0 : set_pte(pte, __pte(0));
1113 :
1114 0 : start += PAGE_SIZE;
1115 0 : pte++;
1116 : }
1117 :
1118 0 : if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
1119 0 : pmd_clear(pmd);
1120 0 : return true;
1121 : }
1122 : return false;
1123 : }
1124 :
1125 0 : static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
1126 : unsigned long start, unsigned long end)
1127 : {
1128 0 : if (unmap_pte_range(pmd, start, end))
1129 0 : if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
1130 0 : pud_clear(pud);
1131 0 : }
1132 :
1133 0 : static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
1134 : {
1135 0 : pmd_t *pmd = pmd_offset(pud, start);
1136 :
1137 : /*
1138 : * Not on a 2MB page boundary?
1139 : */
1140 0 : if (start & (PMD_SIZE - 1)) {
1141 0 : unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
1142 0 : unsigned long pre_end = min_t(unsigned long, end, next_page);
1143 :
1144 0 : __unmap_pmd_range(pud, pmd, start, pre_end);
1145 :
1146 0 : start = pre_end;
1147 0 : pmd++;
1148 : }
1149 :
1150 : /*
1151 : * Try to unmap in 2M chunks.
1152 : */
1153 0 : while (end - start >= PMD_SIZE) {
1154 0 : if (pmd_large(*pmd))
1155 0 : pmd_clear(pmd);
1156 : else
1157 0 : __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
1158 :
1159 0 : start += PMD_SIZE;
1160 0 : pmd++;
1161 : }
1162 :
1163 : /*
1164 : * 4K leftovers?
1165 : */
1166 0 : if (start < end)
1167 0 : return __unmap_pmd_range(pud, pmd, start, end);
1168 :
1169 : /*
1170 : * Try again to free the PMD page if haven't succeeded above.
1171 : */
1172 0 : if (!pud_none(*pud))
1173 0 : if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
1174 0 : pud_clear(pud);
1175 : }
1176 :
1177 0 : static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
1178 : {
1179 0 : pud_t *pud = pud_offset(p4d, start);
1180 :
1181 : /*
1182 : * Not on a GB page boundary?
1183 : */
1184 0 : if (start & (PUD_SIZE - 1)) {
1185 0 : unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
1186 0 : unsigned long pre_end = min_t(unsigned long, end, next_page);
1187 :
1188 0 : unmap_pmd_range(pud, start, pre_end);
1189 :
1190 0 : start = pre_end;
1191 0 : pud++;
1192 : }
1193 :
1194 : /*
1195 : * Try to unmap in 1G chunks?
1196 : */
1197 0 : while (end - start >= PUD_SIZE) {
1198 :
1199 0 : if (pud_large(*pud))
1200 0 : pud_clear(pud);
1201 : else
1202 0 : unmap_pmd_range(pud, start, start + PUD_SIZE);
1203 :
1204 0 : start += PUD_SIZE;
1205 0 : pud++;
1206 : }
1207 :
1208 : /*
1209 : * 2M leftovers?
1210 : */
1211 0 : if (start < end)
1212 0 : unmap_pmd_range(pud, start, end);
1213 :
1214 : /*
1215 : * No need to try to free the PUD page because we'll free it in
1216 : * populate_pgd's error path
1217 : */
1218 0 : }
1219 :
1220 0 : static int alloc_pte_page(pmd_t *pmd)
1221 : {
1222 0 : pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
1223 0 : if (!pte)
1224 : return -1;
1225 :
1226 0 : set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
1227 0 : return 0;
1228 : }
1229 :
1230 0 : static int alloc_pmd_page(pud_t *pud)
1231 : {
1232 0 : pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
1233 0 : if (!pmd)
1234 : return -1;
1235 :
1236 0 : set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
1237 0 : return 0;
1238 : }
1239 :
1240 0 : static void populate_pte(struct cpa_data *cpa,
1241 : unsigned long start, unsigned long end,
1242 : unsigned num_pages, pmd_t *pmd, pgprot_t pgprot)
1243 : {
1244 0 : pte_t *pte;
1245 :
1246 0 : pte = pte_offset_kernel(pmd, start);
1247 :
1248 0 : pgprot = pgprot_clear_protnone_bits(pgprot);
1249 :
1250 0 : while (num_pages-- && start < end) {
1251 0 : set_pte(pte, pfn_pte(cpa->pfn, pgprot));
1252 :
1253 0 : start += PAGE_SIZE;
1254 0 : cpa->pfn++;
1255 0 : pte++;
1256 : }
1257 0 : }
1258 :
1259 0 : static long populate_pmd(struct cpa_data *cpa,
1260 : unsigned long start, unsigned long end,
1261 : unsigned num_pages, pud_t *pud, pgprot_t pgprot)
1262 : {
1263 0 : long cur_pages = 0;
1264 0 : pmd_t *pmd;
1265 0 : pgprot_t pmd_pgprot;
1266 :
1267 : /*
1268 : * Not on a 2M boundary?
1269 : */
1270 0 : if (start & (PMD_SIZE - 1)) {
1271 0 : unsigned long pre_end = start + (num_pages << PAGE_SHIFT);
1272 0 : unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
1273 :
1274 0 : pre_end = min_t(unsigned long, pre_end, next_page);
1275 0 : cur_pages = (pre_end - start) >> PAGE_SHIFT;
1276 0 : cur_pages = min_t(unsigned int, num_pages, cur_pages);
1277 :
1278 : /*
1279 : * Need a PTE page?
1280 : */
1281 0 : pmd = pmd_offset(pud, start);
1282 0 : if (pmd_none(*pmd))
1283 0 : if (alloc_pte_page(pmd))
1284 : return -1;
1285 :
1286 0 : populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot);
1287 :
1288 0 : start = pre_end;
1289 : }
1290 :
1291 : /*
1292 : * We mapped them all?
1293 : */
1294 0 : if (num_pages == cur_pages)
1295 : return cur_pages;
1296 :
1297 0 : pmd_pgprot = pgprot_4k_2_large(pgprot);
1298 :
1299 0 : while (end - start >= PMD_SIZE) {
1300 :
1301 : /*
1302 : * We cannot use a 1G page so allocate a PMD page if needed.
1303 : */
1304 0 : if (pud_none(*pud))
1305 0 : if (alloc_pmd_page(pud))
1306 0 : return -1;
1307 :
1308 0 : pmd = pmd_offset(pud, start);
1309 :
1310 0 : set_pmd(pmd, pmd_mkhuge(pfn_pmd(cpa->pfn,
1311 : canon_pgprot(pmd_pgprot))));
1312 :
1313 0 : start += PMD_SIZE;
1314 0 : cpa->pfn += PMD_SIZE >> PAGE_SHIFT;
1315 0 : cur_pages += PMD_SIZE >> PAGE_SHIFT;
1316 : }
1317 :
1318 : /*
1319 : * Map trailing 4K pages.
1320 : */
1321 0 : if (start < end) {
1322 0 : pmd = pmd_offset(pud, start);
1323 0 : if (pmd_none(*pmd))
1324 0 : if (alloc_pte_page(pmd))
1325 : return -1;
1326 :
1327 0 : populate_pte(cpa, start, end, num_pages - cur_pages,
1328 : pmd, pgprot);
1329 : }
1330 : return num_pages;
1331 : }
1332 :
1333 0 : static int populate_pud(struct cpa_data *cpa, unsigned long start, p4d_t *p4d,
1334 : pgprot_t pgprot)
1335 : {
1336 0 : pud_t *pud;
1337 0 : unsigned long end;
1338 0 : long cur_pages = 0;
1339 0 : pgprot_t pud_pgprot;
1340 :
1341 0 : end = start + (cpa->numpages << PAGE_SHIFT);
1342 :
1343 : /*
1344 : * Not on a Gb page boundary? => map everything up to it with
1345 : * smaller pages.
1346 : */
1347 0 : if (start & (PUD_SIZE - 1)) {
1348 0 : unsigned long pre_end;
1349 0 : unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
1350 :
1351 0 : pre_end = min_t(unsigned long, end, next_page);
1352 0 : cur_pages = (pre_end - start) >> PAGE_SHIFT;
1353 0 : cur_pages = min_t(int, (int)cpa->numpages, cur_pages);
1354 :
1355 0 : pud = pud_offset(p4d, start);
1356 :
1357 : /*
1358 : * Need a PMD page?
1359 : */
1360 0 : if (pud_none(*pud))
1361 0 : if (alloc_pmd_page(pud))
1362 : return -1;
1363 :
1364 0 : cur_pages = populate_pmd(cpa, start, pre_end, cur_pages,
1365 : pud, pgprot);
1366 0 : if (cur_pages < 0)
1367 0 : return cur_pages;
1368 :
1369 : start = pre_end;
1370 : }
1371 :
1372 : /* We mapped them all? */
1373 0 : if (cpa->numpages == cur_pages)
1374 0 : return cur_pages;
1375 :
1376 0 : pud = pud_offset(p4d, start);
1377 0 : pud_pgprot = pgprot_4k_2_large(pgprot);
1378 :
1379 : /*
1380 : * Map everything starting from the Gb boundary, possibly with 1G pages
1381 : */
1382 0 : while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) {
1383 0 : set_pud(pud, pud_mkhuge(pfn_pud(cpa->pfn,
1384 : canon_pgprot(pud_pgprot))));
1385 :
1386 0 : start += PUD_SIZE;
1387 0 : cpa->pfn += PUD_SIZE >> PAGE_SHIFT;
1388 0 : cur_pages += PUD_SIZE >> PAGE_SHIFT;
1389 0 : pud++;
1390 : }
1391 :
1392 : /* Map trailing leftover */
1393 0 : if (start < end) {
1394 0 : long tmp;
1395 :
1396 0 : pud = pud_offset(p4d, start);
1397 0 : if (pud_none(*pud))
1398 0 : if (alloc_pmd_page(pud))
1399 : return -1;
1400 :
1401 0 : tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages,
1402 : pud, pgprot);
1403 0 : if (tmp < 0)
1404 0 : return cur_pages;
1405 :
1406 0 : cur_pages += tmp;
1407 : }
1408 0 : return cur_pages;
1409 : }
1410 :
1411 : /*
1412 : * Restrictions for kernel page table do not necessarily apply when mapping in
1413 : * an alternate PGD.
1414 : */
1415 0 : static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
1416 : {
1417 0 : pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
1418 0 : pud_t *pud = NULL; /* shut up gcc */
1419 0 : p4d_t *p4d;
1420 0 : pgd_t *pgd_entry;
1421 0 : long ret;
1422 :
1423 0 : pgd_entry = cpa->pgd + pgd_index(addr);
1424 :
1425 0 : if (pgd_none(*pgd_entry)) {
1426 : p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL);
1427 : if (!p4d)
1428 : return -1;
1429 :
1430 : set_pgd(pgd_entry, __pgd(__pa(p4d) | _KERNPG_TABLE));
1431 : }
1432 :
1433 : /*
1434 : * Allocate a PUD page and hand it down for mapping.
1435 : */
1436 0 : p4d = p4d_offset(pgd_entry, addr);
1437 0 : if (p4d_none(*p4d)) {
1438 0 : pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
1439 0 : if (!pud)
1440 : return -1;
1441 :
1442 0 : set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
1443 : }
1444 :
1445 0 : pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
1446 0 : pgprot_val(pgprot) |= pgprot_val(cpa->mask_set);
1447 :
1448 0 : ret = populate_pud(cpa, addr, p4d, pgprot);
1449 0 : if (ret < 0) {
1450 : /*
1451 : * Leave the PUD page in place in case some other CPU or thread
1452 : * already found it, but remove any useless entries we just
1453 : * added to it.
1454 : */
1455 0 : unmap_pud_range(p4d, addr,
1456 0 : addr + (cpa->numpages << PAGE_SHIFT));
1457 0 : return ret;
1458 : }
1459 :
1460 0 : cpa->numpages = ret;
1461 0 : return 0;
1462 : }
1463 :
1464 0 : static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
1465 : int primary)
1466 : {
1467 0 : if (cpa->pgd) {
1468 : /*
1469 : * Right now, we only execute this code path when mapping
1470 : * the EFI virtual memory map regions, no other users
1471 : * provide a ->pgd value. This may change in the future.
1472 : */
1473 0 : return populate_pgd(cpa, vaddr);
1474 : }
1475 :
1476 : /*
1477 : * Ignore all non primary paths.
1478 : */
1479 0 : if (!primary) {
1480 0 : cpa->numpages = 1;
1481 0 : return 0;
1482 : }
1483 :
1484 : /*
1485 : * Ignore the NULL PTE for kernel identity mapping, as it is expected
1486 : * to have holes.
1487 : * Also set numpages to '1' indicating that we processed cpa req for
1488 : * one virtual address page and its pfn. TBD: numpages can be set based
1489 : * on the initial value and the level returned by lookup_address().
1490 : */
1491 0 : if (within(vaddr, PAGE_OFFSET,
1492 0 : PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
1493 0 : cpa->numpages = 1;
1494 0 : cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
1495 0 : return 0;
1496 :
1497 0 : } else if (__cpa_pfn_in_highmap(cpa->pfn)) {
1498 : /* Faults in the highmap are OK, so do not warn: */
1499 : return -EFAULT;
1500 : } else {
1501 0 : WARN(1, KERN_WARNING "CPA: called for zero pte. "
1502 : "vaddr = %lx cpa->vaddr = %lx\n", vaddr,
1503 : *cpa->vaddr);
1504 :
1505 0 : return -EFAULT;
1506 : }
1507 : }
1508 :
1509 4373 : static int __change_page_attr(struct cpa_data *cpa, int primary)
1510 : {
1511 4373 : unsigned long address;
1512 4373 : int do_split, err;
1513 4373 : unsigned int level;
1514 4373 : pte_t *kpte, old_pte;
1515 :
1516 4373 : address = __cpa_addr(cpa, cpa->curpage);
1517 4387 : repeat:
1518 4387 : kpte = _lookup_address_cpa(cpa, address, &level);
1519 4387 : if (!kpte)
1520 0 : return __cpa_process_fault(cpa, address, primary);
1521 :
1522 4387 : old_pte = *kpte;
1523 4387 : if (pte_none(old_pte))
1524 0 : return __cpa_process_fault(cpa, address, primary);
1525 :
1526 4387 : if (level == PG_LEVEL_4K) {
1527 4234 : pte_t new_pte;
1528 4234 : pgprot_t new_prot = pte_pgprot(old_pte);
1529 4234 : unsigned long pfn = pte_pfn(old_pte);
1530 :
1531 4234 : pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
1532 4234 : pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
1533 :
1534 4234 : cpa_inc_4k_install();
1535 : /* Hand in lpsize = 0 to enforce the protection mechanism */
1536 4234 : new_prot = static_protections(new_prot, address, pfn, 1, 0,
1537 : CPA_PROTECT);
1538 :
1539 4234 : new_prot = pgprot_clear_protnone_bits(new_prot);
1540 :
1541 : /*
1542 : * We need to keep the pfn from the existing PTE,
1543 : * after all we're only going to change it's attributes
1544 : * not the memory it points to
1545 : */
1546 4234 : new_pte = pfn_pte(pfn, new_prot);
1547 4234 : cpa->pfn = pfn;
1548 : /*
1549 : * Do we really change anything ?
1550 : */
1551 4234 : if (pte_val(old_pte) != pte_val(new_pte)) {
1552 2826 : set_pte_atomic(kpte, new_pte);
1553 2826 : cpa->flags |= CPA_FLUSHTLB;
1554 : }
1555 4234 : cpa->numpages = 1;
1556 4234 : return 0;
1557 : }
1558 :
1559 : /*
1560 : * Check, whether we can keep the large page intact
1561 : * and just change the pte:
1562 : */
1563 153 : do_split = should_split_large_page(kpte, address, cpa);
1564 : /*
1565 : * When the range fits into the existing large page,
1566 : * return. cp->numpages and cpa->tlbflush have been updated in
1567 : * try_large_page:
1568 : */
1569 153 : if (do_split <= 0)
1570 139 : return do_split;
1571 :
1572 : /*
1573 : * We have to split the large page:
1574 : */
1575 14 : err = split_large_page(cpa, kpte, address);
1576 14 : if (!err)
1577 14 : goto repeat;
1578 :
1579 : return err;
1580 : }
1581 :
1582 : static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
1583 :
1584 988 : static int cpa_process_alias(struct cpa_data *cpa)
1585 : {
1586 988 : struct cpa_data alias_cpa;
1587 988 : unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
1588 988 : unsigned long vaddr;
1589 988 : int ret;
1590 :
1591 988 : if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1))
1592 : return 0;
1593 :
1594 : /*
1595 : * No need to redo, when the primary call touched the direct
1596 : * mapping already:
1597 : */
1598 988 : vaddr = __cpa_addr(cpa, cpa->curpage);
1599 988 : if (!(within(vaddr, PAGE_OFFSET,
1600 988 : PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
1601 :
1602 986 : alias_cpa = *cpa;
1603 986 : alias_cpa.vaddr = &laddr;
1604 986 : alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
1605 986 : alias_cpa.curpage = 0;
1606 :
1607 986 : cpa->force_flush_all = 1;
1608 :
1609 986 : ret = __change_page_attr_set_clr(&alias_cpa, 0);
1610 986 : if (ret)
1611 : return ret;
1612 : }
1613 :
1614 : #ifdef CONFIG_X86_64
1615 : /*
1616 : * If the primary call didn't touch the high mapping already
1617 : * and the physical address is inside the kernel map, we need
1618 : * to touch the high mapped kernel as well:
1619 : */
1620 988 : if (!within(vaddr, (unsigned long)_text, _brk_end) &&
1621 9 : __cpa_pfn_in_highmap(cpa->pfn)) {
1622 0 : unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
1623 0 : __START_KERNEL_map - phys_base;
1624 0 : alias_cpa = *cpa;
1625 0 : alias_cpa.vaddr = &temp_cpa_vaddr;
1626 0 : alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
1627 0 : alias_cpa.curpage = 0;
1628 :
1629 0 : cpa->force_flush_all = 1;
1630 : /*
1631 : * The high mapping range is imprecise, so ignore the
1632 : * return value.
1633 : */
1634 0 : __change_page_attr_set_clr(&alias_cpa, 0);
1635 : }
1636 : #endif
1637 :
1638 : return 0;
1639 : }
1640 :
1641 1007 : static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
1642 : {
1643 1007 : unsigned long numpages = cpa->numpages;
1644 1007 : unsigned long rempages = numpages;
1645 1007 : int ret = 0;
1646 :
1647 5380 : while (rempages) {
1648 : /*
1649 : * Store the remaining nr of pages for the large page
1650 : * preservation check.
1651 : */
1652 4373 : cpa->numpages = rempages;
1653 : /* for array changes, we can't use large page */
1654 4373 : if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY))
1655 0 : cpa->numpages = 1;
1656 :
1657 4373 : if (!debug_pagealloc_enabled())
1658 4373 : spin_lock(&cpa_lock);
1659 4373 : ret = __change_page_attr(cpa, checkalias);
1660 4373 : if (!debug_pagealloc_enabled())
1661 4373 : spin_unlock(&cpa_lock);
1662 4373 : if (ret)
1663 0 : goto out;
1664 :
1665 4373 : if (checkalias) {
1666 988 : ret = cpa_process_alias(cpa);
1667 988 : if (ret)
1668 0 : goto out;
1669 : }
1670 :
1671 : /*
1672 : * Adjust the number of pages with the result of the
1673 : * CPA operation. Either a large page has been
1674 : * preserved or a single page update happened.
1675 : */
1676 4373 : BUG_ON(cpa->numpages > rempages || !cpa->numpages);
1677 4373 : rempages -= cpa->numpages;
1678 4373 : cpa->curpage += cpa->numpages;
1679 : }
1680 :
1681 1007 : out:
1682 : /* Restore the original numpages */
1683 1007 : cpa->numpages = numpages;
1684 1007 : return ret;
1685 : }
1686 :
1687 21 : static int change_page_attr_set_clr(unsigned long *addr, int numpages,
1688 : pgprot_t mask_set, pgprot_t mask_clr,
1689 : int force_split, int in_flag,
1690 : struct page **pages)
1691 : {
1692 21 : struct cpa_data cpa;
1693 21 : int ret, cache, checkalias;
1694 :
1695 21 : memset(&cpa, 0, sizeof(cpa));
1696 :
1697 : /*
1698 : * Check, if we are requested to set a not supported
1699 : * feature. Clearing non-supported features is OK.
1700 : */
1701 21 : mask_set = canon_pgprot(mask_set);
1702 :
1703 21 : if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
1704 : return 0;
1705 :
1706 : /* Ensure we are PAGE_SIZE aligned */
1707 21 : if (in_flag & CPA_ARRAY) {
1708 : int i;
1709 0 : for (i = 0; i < numpages; i++) {
1710 0 : if (addr[i] & ~PAGE_MASK) {
1711 0 : addr[i] &= PAGE_MASK;
1712 0 : WARN_ON_ONCE(1);
1713 : }
1714 : }
1715 21 : } else if (!(in_flag & CPA_PAGES_ARRAY)) {
1716 : /*
1717 : * in_flag of CPA_PAGES_ARRAY implies it is aligned.
1718 : * No need to check in that case
1719 : */
1720 21 : if (*addr & ~PAGE_MASK) {
1721 0 : *addr &= PAGE_MASK;
1722 : /*
1723 : * People should not be passing in unaligned addresses:
1724 : */
1725 0 : WARN_ON_ONCE(1);
1726 : }
1727 : }
1728 :
1729 : /* Must avoid aliasing mappings in the highmem code */
1730 21 : kmap_flush_unused();
1731 :
1732 21 : vm_unmap_aliases();
1733 :
1734 21 : cpa.vaddr = addr;
1735 21 : cpa.pages = pages;
1736 21 : cpa.numpages = numpages;
1737 21 : cpa.mask_set = mask_set;
1738 21 : cpa.mask_clr = mask_clr;
1739 21 : cpa.flags = 0;
1740 21 : cpa.curpage = 0;
1741 21 : cpa.force_split = force_split;
1742 :
1743 21 : if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
1744 0 : cpa.flags |= in_flag;
1745 :
1746 : /* No alias checking for _NX bit modifications */
1747 21 : checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
1748 : /* Has caller explicitly disabled alias checking? */
1749 21 : if (in_flag & CPA_NO_CHECK_ALIAS)
1750 0 : checkalias = 0;
1751 :
1752 21 : ret = __change_page_attr_set_clr(&cpa, checkalias);
1753 :
1754 : /*
1755 : * Check whether we really changed something:
1756 : */
1757 21 : if (!(cpa.flags & CPA_FLUSHTLB))
1758 5 : goto out;
1759 :
1760 : /*
1761 : * No need to flush, when we did not set any of the caching
1762 : * attributes:
1763 : */
1764 16 : cache = !!pgprot2cachemode(mask_set);
1765 :
1766 : /*
1767 : * On error; flush everything to be sure.
1768 : */
1769 16 : if (ret) {
1770 0 : cpa_flush_all(cache);
1771 0 : goto out;
1772 : }
1773 :
1774 16 : cpa_flush(&cpa, cache);
1775 : out:
1776 : return ret;
1777 : }
1778 :
1779 10 : static inline int change_page_attr_set(unsigned long *addr, int numpages,
1780 : pgprot_t mask, int array)
1781 : {
1782 20 : return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
1783 : (array ? CPA_ARRAY : 0), NULL);
1784 : }
1785 :
1786 11 : static inline int change_page_attr_clear(unsigned long *addr, int numpages,
1787 : pgprot_t mask, int array)
1788 : {
1789 22 : return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
1790 : (array ? CPA_ARRAY : 0), NULL);
1791 : }
1792 :
1793 0 : static inline int cpa_set_pages_array(struct page **pages, int numpages,
1794 : pgprot_t mask)
1795 : {
1796 0 : return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0,
1797 : CPA_PAGES_ARRAY, pages);
1798 : }
1799 :
1800 0 : static inline int cpa_clear_pages_array(struct page **pages, int numpages,
1801 : pgprot_t mask)
1802 : {
1803 0 : return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0,
1804 : CPA_PAGES_ARRAY, pages);
1805 : }
1806 :
1807 : /*
1808 : * _set_memory_prot is an internal helper for callers that have been passed
1809 : * a pgprot_t value from upper layers and a reservation has already been taken.
1810 : * If you want to set the pgprot to a specific page protocol, use the
1811 : * set_memory_xx() functions.
1812 : */
1813 0 : int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot)
1814 : {
1815 0 : return change_page_attr_set_clr(&addr, numpages, prot,
1816 0 : __pgprot(~pgprot_val(prot)), 0, 0,
1817 : NULL);
1818 : }
1819 :
1820 0 : int _set_memory_uc(unsigned long addr, int numpages)
1821 : {
1822 : /*
1823 : * for now UC MINUS. see comments in ioremap()
1824 : * If you really need strong UC use ioremap_uc(), but note
1825 : * that you cannot override IO areas with set_memory_*() as
1826 : * these helpers cannot work with IO memory.
1827 : */
1828 0 : return change_page_attr_set(&addr, numpages,
1829 : cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
1830 : 0);
1831 : }
1832 :
1833 0 : int set_memory_uc(unsigned long addr, int numpages)
1834 : {
1835 0 : int ret;
1836 :
1837 : /*
1838 : * for now UC MINUS. see comments in ioremap()
1839 : */
1840 0 : ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
1841 : _PAGE_CACHE_MODE_UC_MINUS, NULL);
1842 0 : if (ret)
1843 0 : goto out_err;
1844 :
1845 0 : ret = _set_memory_uc(addr, numpages);
1846 0 : if (ret)
1847 0 : goto out_free;
1848 :
1849 : return 0;
1850 :
1851 0 : out_free:
1852 0 : memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
1853 : out_err:
1854 : return ret;
1855 : }
1856 : EXPORT_SYMBOL(set_memory_uc);
1857 :
1858 0 : int _set_memory_wc(unsigned long addr, int numpages)
1859 : {
1860 0 : int ret;
1861 :
1862 0 : ret = change_page_attr_set(&addr, numpages,
1863 : cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS),
1864 : 0);
1865 0 : if (!ret) {
1866 0 : ret = change_page_attr_set_clr(&addr, numpages,
1867 : cachemode2pgprot(_PAGE_CACHE_MODE_WC),
1868 0 : __pgprot(_PAGE_CACHE_MASK),
1869 : 0, 0, NULL);
1870 : }
1871 0 : return ret;
1872 : }
1873 :
1874 0 : int set_memory_wc(unsigned long addr, int numpages)
1875 : {
1876 0 : int ret;
1877 :
1878 0 : ret = memtype_reserve(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
1879 : _PAGE_CACHE_MODE_WC, NULL);
1880 0 : if (ret)
1881 : return ret;
1882 :
1883 0 : ret = _set_memory_wc(addr, numpages);
1884 0 : if (ret)
1885 0 : memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
1886 :
1887 : return ret;
1888 : }
1889 : EXPORT_SYMBOL(set_memory_wc);
1890 :
1891 0 : int _set_memory_wt(unsigned long addr, int numpages)
1892 : {
1893 0 : return change_page_attr_set(&addr, numpages,
1894 : cachemode2pgprot(_PAGE_CACHE_MODE_WT), 0);
1895 : }
1896 :
1897 0 : int _set_memory_wb(unsigned long addr, int numpages)
1898 : {
1899 : /* WB cache mode is hard wired to all cache attribute bits being 0 */
1900 0 : return change_page_attr_clear(&addr, numpages,
1901 0 : __pgprot(_PAGE_CACHE_MASK), 0);
1902 : }
1903 :
1904 0 : int set_memory_wb(unsigned long addr, int numpages)
1905 : {
1906 0 : int ret;
1907 :
1908 0 : ret = _set_memory_wb(addr, numpages);
1909 0 : if (ret)
1910 : return ret;
1911 :
1912 0 : memtype_free(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
1913 0 : return 0;
1914 : }
1915 : EXPORT_SYMBOL(set_memory_wb);
1916 :
1917 1 : int set_memory_x(unsigned long addr, int numpages)
1918 : {
1919 1 : if (!(__supported_pte_mask & _PAGE_NX))
1920 : return 0;
1921 :
1922 1 : return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
1923 : }
1924 :
1925 6 : int set_memory_nx(unsigned long addr, int numpages)
1926 : {
1927 6 : if (!(__supported_pte_mask & _PAGE_NX))
1928 : return 0;
1929 :
1930 6 : return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
1931 : }
1932 :
1933 10 : int set_memory_ro(unsigned long addr, int numpages)
1934 : {
1935 10 : return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
1936 : }
1937 :
1938 4 : int set_memory_rw(unsigned long addr, int numpages)
1939 : {
1940 4 : return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
1941 : }
1942 :
1943 0 : int set_memory_np(unsigned long addr, int numpages)
1944 : {
1945 0 : return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
1946 : }
1947 :
1948 0 : int set_memory_np_noalias(unsigned long addr, int numpages)
1949 : {
1950 0 : int cpa_flags = CPA_NO_CHECK_ALIAS;
1951 :
1952 0 : return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
1953 0 : __pgprot(_PAGE_PRESENT), 0,
1954 : cpa_flags, NULL);
1955 : }
1956 :
1957 0 : int set_memory_4k(unsigned long addr, int numpages)
1958 : {
1959 0 : return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
1960 0 : __pgprot(0), 1, 0, NULL);
1961 : }
1962 :
1963 0 : int set_memory_nonglobal(unsigned long addr, int numpages)
1964 : {
1965 0 : return change_page_attr_clear(&addr, numpages,
1966 0 : __pgprot(_PAGE_GLOBAL), 0);
1967 : }
1968 :
1969 0 : int set_memory_global(unsigned long addr, int numpages)
1970 : {
1971 0 : return change_page_attr_set(&addr, numpages,
1972 0 : __pgprot(_PAGE_GLOBAL), 0);
1973 : }
1974 :
1975 0 : static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
1976 : {
1977 0 : struct cpa_data cpa;
1978 0 : int ret;
1979 :
1980 : /* Nothing to do if memory encryption is not active */
1981 0 : if (!mem_encrypt_active())
1982 0 : return 0;
1983 :
1984 : /* Should not be working on unaligned addresses */
1985 : if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr))
1986 : addr &= PAGE_MASK;
1987 :
1988 : memset(&cpa, 0, sizeof(cpa));
1989 : cpa.vaddr = &addr;
1990 : cpa.numpages = numpages;
1991 : cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0);
1992 : cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC);
1993 : cpa.pgd = init_mm.pgd;
1994 :
1995 : /* Must avoid aliasing mappings in the highmem code */
1996 : kmap_flush_unused();
1997 : vm_unmap_aliases();
1998 :
1999 : /*
2000 : * Before changing the encryption attribute, we need to flush caches.
2001 : */
2002 : cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT));
2003 :
2004 : ret = __change_page_attr_set_clr(&cpa, 1);
2005 :
2006 : /*
2007 : * After changing the encryption attribute, we need to flush TLBs again
2008 : * in case any speculative TLB caching occurred (but no need to flush
2009 : * caches again). We could just use cpa_flush_all(), but in case TLB
2010 : * flushing gets optimized in the cpa_flush() path use the same logic
2011 : * as above.
2012 : */
2013 : cpa_flush(&cpa, 0);
2014 :
2015 : return ret;
2016 : }
2017 :
2018 0 : int set_memory_encrypted(unsigned long addr, int numpages)
2019 : {
2020 0 : return __set_memory_enc_dec(addr, numpages, true);
2021 : }
2022 : EXPORT_SYMBOL_GPL(set_memory_encrypted);
2023 :
2024 0 : int set_memory_decrypted(unsigned long addr, int numpages)
2025 : {
2026 0 : return __set_memory_enc_dec(addr, numpages, false);
2027 : }
2028 : EXPORT_SYMBOL_GPL(set_memory_decrypted);
2029 :
2030 0 : int set_pages_uc(struct page *page, int numpages)
2031 : {
2032 0 : unsigned long addr = (unsigned long)page_address(page);
2033 :
2034 0 : return set_memory_uc(addr, numpages);
2035 : }
2036 : EXPORT_SYMBOL(set_pages_uc);
2037 :
2038 0 : static int _set_pages_array(struct page **pages, int numpages,
2039 : enum page_cache_mode new_type)
2040 : {
2041 0 : unsigned long start;
2042 0 : unsigned long end;
2043 0 : enum page_cache_mode set_type;
2044 0 : int i;
2045 0 : int free_idx;
2046 0 : int ret;
2047 :
2048 0 : for (i = 0; i < numpages; i++) {
2049 0 : if (PageHighMem(pages[i]))
2050 : continue;
2051 0 : start = page_to_pfn(pages[i]) << PAGE_SHIFT;
2052 0 : end = start + PAGE_SIZE;
2053 0 : if (memtype_reserve(start, end, new_type, NULL))
2054 0 : goto err_out;
2055 : }
2056 :
2057 : /* If WC, set to UC- first and then WC */
2058 0 : set_type = (new_type == _PAGE_CACHE_MODE_WC) ?
2059 0 : _PAGE_CACHE_MODE_UC_MINUS : new_type;
2060 :
2061 0 : ret = cpa_set_pages_array(pages, numpages,
2062 : cachemode2pgprot(set_type));
2063 0 : if (!ret && new_type == _PAGE_CACHE_MODE_WC)
2064 0 : ret = change_page_attr_set_clr(NULL, numpages,
2065 : cachemode2pgprot(
2066 : _PAGE_CACHE_MODE_WC),
2067 0 : __pgprot(_PAGE_CACHE_MASK),
2068 : 0, CPA_PAGES_ARRAY, pages);
2069 0 : if (ret)
2070 0 : goto err_out;
2071 : return 0; /* Success */
2072 0 : err_out:
2073 0 : free_idx = i;
2074 0 : for (i = 0; i < free_idx; i++) {
2075 0 : if (PageHighMem(pages[i]))
2076 : continue;
2077 0 : start = page_to_pfn(pages[i]) << PAGE_SHIFT;
2078 0 : end = start + PAGE_SIZE;
2079 0 : memtype_free(start, end);
2080 : }
2081 : return -EINVAL;
2082 : }
2083 :
2084 0 : int set_pages_array_uc(struct page **pages, int numpages)
2085 : {
2086 0 : return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_UC_MINUS);
2087 : }
2088 : EXPORT_SYMBOL(set_pages_array_uc);
2089 :
2090 0 : int set_pages_array_wc(struct page **pages, int numpages)
2091 : {
2092 0 : return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WC);
2093 : }
2094 : EXPORT_SYMBOL(set_pages_array_wc);
2095 :
2096 0 : int set_pages_array_wt(struct page **pages, int numpages)
2097 : {
2098 0 : return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WT);
2099 : }
2100 : EXPORT_SYMBOL_GPL(set_pages_array_wt);
2101 :
2102 0 : int set_pages_wb(struct page *page, int numpages)
2103 : {
2104 0 : unsigned long addr = (unsigned long)page_address(page);
2105 :
2106 0 : return set_memory_wb(addr, numpages);
2107 : }
2108 : EXPORT_SYMBOL(set_pages_wb);
2109 :
2110 0 : int set_pages_array_wb(struct page **pages, int numpages)
2111 : {
2112 0 : int retval;
2113 0 : unsigned long start;
2114 0 : unsigned long end;
2115 0 : int i;
2116 :
2117 : /* WB cache mode is hard wired to all cache attribute bits being 0 */
2118 0 : retval = cpa_clear_pages_array(pages, numpages,
2119 0 : __pgprot(_PAGE_CACHE_MASK));
2120 0 : if (retval)
2121 : return retval;
2122 :
2123 0 : for (i = 0; i < numpages; i++) {
2124 0 : if (PageHighMem(pages[i]))
2125 : continue;
2126 0 : start = page_to_pfn(pages[i]) << PAGE_SHIFT;
2127 0 : end = start + PAGE_SIZE;
2128 0 : memtype_free(start, end);
2129 : }
2130 :
2131 : return 0;
2132 : }
2133 : EXPORT_SYMBOL(set_pages_array_wb);
2134 :
2135 0 : int set_pages_ro(struct page *page, int numpages)
2136 : {
2137 0 : unsigned long addr = (unsigned long)page_address(page);
2138 :
2139 0 : return set_memory_ro(addr, numpages);
2140 : }
2141 :
2142 0 : int set_pages_rw(struct page *page, int numpages)
2143 : {
2144 0 : unsigned long addr = (unsigned long)page_address(page);
2145 :
2146 0 : return set_memory_rw(addr, numpages);
2147 : }
2148 :
2149 0 : static int __set_pages_p(struct page *page, int numpages)
2150 : {
2151 0 : unsigned long tempaddr = (unsigned long) page_address(page);
2152 0 : struct cpa_data cpa = { .vaddr = &tempaddr,
2153 : .pgd = NULL,
2154 : .numpages = numpages,
2155 : .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
2156 : .mask_clr = __pgprot(0),
2157 : .flags = 0};
2158 :
2159 : /*
2160 : * No alias checking needed for setting present flag. otherwise,
2161 : * we may need to break large pages for 64-bit kernel text
2162 : * mappings (this adds to complexity if we want to do this from
2163 : * atomic context especially). Let's keep it simple!
2164 : */
2165 0 : return __change_page_attr_set_clr(&cpa, 0);
2166 : }
2167 :
2168 0 : static int __set_pages_np(struct page *page, int numpages)
2169 : {
2170 0 : unsigned long tempaddr = (unsigned long) page_address(page);
2171 0 : struct cpa_data cpa = { .vaddr = &tempaddr,
2172 : .pgd = NULL,
2173 : .numpages = numpages,
2174 : .mask_set = __pgprot(0),
2175 : .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
2176 : .flags = 0};
2177 :
2178 : /*
2179 : * No alias checking needed for setting not present flag. otherwise,
2180 : * we may need to break large pages for 64-bit kernel text
2181 : * mappings (this adds to complexity if we want to do this from
2182 : * atomic context especially). Let's keep it simple!
2183 : */
2184 0 : return __change_page_attr_set_clr(&cpa, 0);
2185 : }
2186 :
2187 0 : int set_direct_map_invalid_noflush(struct page *page)
2188 : {
2189 0 : return __set_pages_np(page, 1);
2190 : }
2191 :
2192 0 : int set_direct_map_default_noflush(struct page *page)
2193 : {
2194 0 : return __set_pages_p(page, 1);
2195 : }
2196 :
2197 : #ifdef CONFIG_DEBUG_PAGEALLOC
2198 : void __kernel_map_pages(struct page *page, int numpages, int enable)
2199 : {
2200 : if (PageHighMem(page))
2201 : return;
2202 : if (!enable) {
2203 : debug_check_no_locks_freed(page_address(page),
2204 : numpages * PAGE_SIZE);
2205 : }
2206 :
2207 : /*
2208 : * The return value is ignored as the calls cannot fail.
2209 : * Large pages for identity mappings are not used at boot time
2210 : * and hence no memory allocations during large page split.
2211 : */
2212 : if (enable)
2213 : __set_pages_p(page, numpages);
2214 : else
2215 : __set_pages_np(page, numpages);
2216 :
2217 : /*
2218 : * We should perform an IPI and flush all tlbs,
2219 : * but that can deadlock->flush only current cpu.
2220 : * Preemption needs to be disabled around __flush_tlb_all() due to
2221 : * CR3 reload in __native_flush_tlb().
2222 : */
2223 : preempt_disable();
2224 : __flush_tlb_all();
2225 : preempt_enable();
2226 :
2227 : arch_flush_lazy_mmu_mode();
2228 : }
2229 : #endif /* CONFIG_DEBUG_PAGEALLOC */
2230 :
2231 0 : bool kernel_page_present(struct page *page)
2232 : {
2233 0 : unsigned int level;
2234 0 : pte_t *pte;
2235 :
2236 0 : if (PageHighMem(page))
2237 : return false;
2238 :
2239 0 : pte = lookup_address((unsigned long)page_address(page), &level);
2240 0 : return (pte_val(*pte) & _PAGE_PRESENT);
2241 : }
2242 :
2243 0 : int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
2244 : unsigned numpages, unsigned long page_flags)
2245 : {
2246 0 : int retval = -EINVAL;
2247 :
2248 0 : struct cpa_data cpa = {
2249 : .vaddr = &address,
2250 : .pfn = pfn,
2251 : .pgd = pgd,
2252 : .numpages = numpages,
2253 : .mask_set = __pgprot(0),
2254 0 : .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)),
2255 : .flags = 0,
2256 : };
2257 :
2258 0 : WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
2259 :
2260 0 : if (!(__supported_pte_mask & _PAGE_NX))
2261 0 : goto out;
2262 :
2263 0 : if (!(page_flags & _PAGE_ENC))
2264 0 : cpa.mask_clr = pgprot_encrypted(cpa.mask_clr);
2265 :
2266 0 : cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);
2267 :
2268 0 : retval = __change_page_attr_set_clr(&cpa, 0);
2269 0 : __flush_tlb_all();
2270 :
2271 0 : out:
2272 0 : return retval;
2273 : }
2274 :
2275 : /*
2276 : * __flush_tlb_all() flushes mappings only on current CPU and hence this
2277 : * function shouldn't be used in an SMP environment. Presently, it's used only
2278 : * during boot (way before smp_init()) by EFI subsystem and hence is ok.
2279 : */
2280 0 : int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
2281 : unsigned long numpages)
2282 : {
2283 0 : int retval;
2284 :
2285 : /*
2286 : * The typical sequence for unmapping is to find a pte through
2287 : * lookup_address_in_pgd() (ideally, it should never return NULL because
2288 : * the address is already mapped) and change it's protections. As pfn is
2289 : * the *target* of a mapping, it's not useful while unmapping.
2290 : */
2291 0 : struct cpa_data cpa = {
2292 : .vaddr = &address,
2293 : .pfn = 0,
2294 : .pgd = pgd,
2295 : .numpages = numpages,
2296 : .mask_set = __pgprot(0),
2297 : .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
2298 : .flags = 0,
2299 : };
2300 :
2301 0 : WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
2302 :
2303 0 : retval = __change_page_attr_set_clr(&cpa, 0);
2304 0 : __flush_tlb_all();
2305 :
2306 0 : return retval;
2307 : }
2308 :
2309 : /*
2310 : * The testcases use internal knowledge of the implementation that shouldn't
2311 : * be exposed to the rest of the kernel. Include these directly here.
2312 : */
2313 : #ifdef CONFIG_CPA_DEBUG
2314 : #include "cpa-test.c"
2315 : #endif
|