Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * This file contains KASAN runtime code that manages shadow memory for
4 : * generic and software tag-based KASAN modes.
5 : *
6 : * Copyright (c) 2014 Samsung Electronics Co., Ltd.
7 : * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
8 : *
9 : * Some code borrowed from https://github.com/xairy/kasan-prototype by
10 : * Andrey Konovalov <andreyknvl@gmail.com>
11 : */
12 :
13 : #include <linux/init.h>
14 : #include <linux/kasan.h>
15 : #include <linux/kernel.h>
16 : #include <linux/kfence.h>
17 : #include <linux/kmemleak.h>
18 : #include <linux/memory.h>
19 : #include <linux/mm.h>
20 : #include <linux/string.h>
21 : #include <linux/types.h>
22 : #include <linux/vmalloc.h>
23 :
24 : #include <asm/cacheflush.h>
25 : #include <asm/tlbflush.h>
26 :
27 : #include "kasan.h"
28 :
29 284647233 : bool __kasan_check_read(const volatile void *p, unsigned int size)
30 : {
31 284647233 : return kasan_check_range((unsigned long)p, size, false, _RET_IP_);
32 : }
33 : EXPORT_SYMBOL(__kasan_check_read);
34 :
35 41250310 : bool __kasan_check_write(const volatile void *p, unsigned int size)
36 : {
37 41250310 : return kasan_check_range((unsigned long)p, size, true, _RET_IP_);
38 : }
39 : EXPORT_SYMBOL(__kasan_check_write);
40 :
41 : #undef memset
42 4629157 : void *memset(void *addr, int c, size_t len)
43 : {
44 4629157 : if (!kasan_check_range((unsigned long)addr, len, true, _RET_IP_))
45 : return NULL;
46 :
47 4637324 : return __memset(addr, c, len);
48 : }
49 :
50 : #ifdef __HAVE_ARCH_MEMMOVE
51 : #undef memmove
52 2055 : void *memmove(void *dest, const void *src, size_t len)
53 : {
54 4110 : if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) ||
55 2055 : !kasan_check_range((unsigned long)dest, len, true, _RET_IP_))
56 0 : return NULL;
57 :
58 2055 : return __memmove(dest, src, len);
59 : }
60 : #endif
61 :
62 : #undef memcpy
63 780770 : void *memcpy(void *dest, const void *src, size_t len)
64 : {
65 1561582 : if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) ||
66 780782 : !kasan_check_range((unsigned long)dest, len, true, _RET_IP_))
67 0 : return NULL;
68 :
69 780812 : return __memcpy(dest, src, len);
70 : }
71 :
72 3496550 : void kasan_poison(const void *addr, size_t size, u8 value)
73 : {
74 3496550 : void *shadow_start, *shadow_end;
75 :
76 : /*
77 : * Perform shadow offset calculation based on untagged address, as
78 : * some of the callers (e.g. kasan_poison_object_data) pass tagged
79 : * addresses to this function.
80 : */
81 3496550 : addr = kasan_reset_tag(addr);
82 :
83 : /* Skip KFENCE memory if called explicitly outside of sl*b. */
84 3496550 : if (is_kfence_address(addr))
85 : return;
86 :
87 3496550 : if (WARN_ON((unsigned long)addr & KASAN_GRANULE_MASK))
88 : return;
89 3496550 : if (WARN_ON(size & KASAN_GRANULE_MASK))
90 : return;
91 :
92 3496550 : shadow_start = kasan_mem_to_shadow(addr);
93 3496550 : shadow_end = kasan_mem_to_shadow(addr + size);
94 :
95 3496550 : __memset(shadow_start, value, shadow_end - shadow_start);
96 : }
97 : EXPORT_SYMBOL(kasan_poison);
98 :
99 : #ifdef CONFIG_KASAN_GENERIC
100 1916632 : void kasan_poison_last_granule(const void *addr, size_t size)
101 : {
102 98929 : if (size & KASAN_GRANULE_MASK) {
103 30203 : u8 *shadow = (u8 *)kasan_mem_to_shadow(addr + size);
104 30203 : *shadow = size & KASAN_GRANULE_MASK;
105 : }
106 98929 : }
107 : #endif
108 :
109 1818011 : void kasan_unpoison(const void *addr, size_t size)
110 : {
111 1818011 : u8 tag = get_tag(addr);
112 :
113 : /*
114 : * Perform shadow offset calculation based on untagged address, as
115 : * some of the callers (e.g. kasan_unpoison_object_data) pass tagged
116 : * addresses to this function.
117 : */
118 1818011 : addr = kasan_reset_tag(addr);
119 :
120 : /*
121 : * Skip KFENCE memory if called explicitly outside of sl*b. Also note
122 : * that calls to ksize(), where size is not a multiple of machine-word
123 : * size, would otherwise poison the invalid portion of the word.
124 : */
125 1818011 : if (is_kfence_address(addr))
126 : return;
127 :
128 1818011 : if (WARN_ON((unsigned long)addr & KASAN_GRANULE_MASK))
129 : return;
130 :
131 : /* Unpoison all granules that cover the object. */
132 1818011 : kasan_poison(addr, round_up(size, KASAN_GRANULE_SIZE), tag);
133 :
134 : /* Partially poison the last granule for the generic mode. */
135 1817703 : if (IS_ENABLED(CONFIG_KASAN_GENERIC))
136 1817703 : kasan_poison_last_granule(addr, size);
137 : }
138 :
139 : #ifdef CONFIG_MEMORY_HOTPLUG
140 : static bool shadow_mapped(unsigned long addr)
141 : {
142 : pgd_t *pgd = pgd_offset_k(addr);
143 : p4d_t *p4d;
144 : pud_t *pud;
145 : pmd_t *pmd;
146 : pte_t *pte;
147 :
148 : if (pgd_none(*pgd))
149 : return false;
150 : p4d = p4d_offset(pgd, addr);
151 : if (p4d_none(*p4d))
152 : return false;
153 : pud = pud_offset(p4d, addr);
154 : if (pud_none(*pud))
155 : return false;
156 :
157 : /*
158 : * We can't use pud_large() or pud_huge(), the first one is
159 : * arch-specific, the last one depends on HUGETLB_PAGE. So let's abuse
160 : * pud_bad(), if pud is bad then it's bad because it's huge.
161 : */
162 : if (pud_bad(*pud))
163 : return true;
164 : pmd = pmd_offset(pud, addr);
165 : if (pmd_none(*pmd))
166 : return false;
167 :
168 : if (pmd_bad(*pmd))
169 : return true;
170 : pte = pte_offset_kernel(pmd, addr);
171 : return !pte_none(*pte);
172 : }
173 :
174 : static int __meminit kasan_mem_notifier(struct notifier_block *nb,
175 : unsigned long action, void *data)
176 : {
177 : struct memory_notify *mem_data = data;
178 : unsigned long nr_shadow_pages, start_kaddr, shadow_start;
179 : unsigned long shadow_end, shadow_size;
180 :
181 : nr_shadow_pages = mem_data->nr_pages >> KASAN_SHADOW_SCALE_SHIFT;
182 : start_kaddr = (unsigned long)pfn_to_kaddr(mem_data->start_pfn);
183 : shadow_start = (unsigned long)kasan_mem_to_shadow((void *)start_kaddr);
184 : shadow_size = nr_shadow_pages << PAGE_SHIFT;
185 : shadow_end = shadow_start + shadow_size;
186 :
187 : if (WARN_ON(mem_data->nr_pages % KASAN_GRANULE_SIZE) ||
188 : WARN_ON(start_kaddr % KASAN_MEMORY_PER_SHADOW_PAGE))
189 : return NOTIFY_BAD;
190 :
191 : switch (action) {
192 : case MEM_GOING_ONLINE: {
193 : void *ret;
194 :
195 : /*
196 : * If shadow is mapped already than it must have been mapped
197 : * during the boot. This could happen if we onlining previously
198 : * offlined memory.
199 : */
200 : if (shadow_mapped(shadow_start))
201 : return NOTIFY_OK;
202 :
203 : ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start,
204 : shadow_end, GFP_KERNEL,
205 : PAGE_KERNEL, VM_NO_GUARD,
206 : pfn_to_nid(mem_data->start_pfn),
207 : __builtin_return_address(0));
208 : if (!ret)
209 : return NOTIFY_BAD;
210 :
211 : kmemleak_ignore(ret);
212 : return NOTIFY_OK;
213 : }
214 : case MEM_CANCEL_ONLINE:
215 : case MEM_OFFLINE: {
216 : struct vm_struct *vm;
217 :
218 : /*
219 : * shadow_start was either mapped during boot by kasan_init()
220 : * or during memory online by __vmalloc_node_range().
221 : * In the latter case we can use vfree() to free shadow.
222 : * Non-NULL result of the find_vm_area() will tell us if
223 : * that was the second case.
224 : *
225 : * Currently it's not possible to free shadow mapped
226 : * during boot by kasan_init(). It's because the code
227 : * to do that hasn't been written yet. So we'll just
228 : * leak the memory.
229 : */
230 : vm = find_vm_area((void *)shadow_start);
231 : if (vm)
232 : vfree((void *)shadow_start);
233 : }
234 : }
235 :
236 : return NOTIFY_OK;
237 : }
238 :
239 : static int __init kasan_memhotplug_init(void)
240 : {
241 : hotplug_memory_notifier(kasan_mem_notifier, 0);
242 :
243 : return 0;
244 : }
245 :
246 : core_initcall(kasan_memhotplug_init);
247 : #endif
248 :
249 : #ifdef CONFIG_KASAN_VMALLOC
250 :
251 13023 : static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr,
252 : void *unused)
253 : {
254 13023 : unsigned long page;
255 13023 : pte_t pte;
256 :
257 13023 : if (likely(!pte_none(*ptep)))
258 : return 0;
259 :
260 3064 : page = __get_free_page(GFP_KERNEL);
261 3064 : if (!page)
262 : return -ENOMEM;
263 :
264 3064 : memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE);
265 3064 : pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL);
266 :
267 3064 : spin_lock(&init_mm.page_table_lock);
268 3064 : if (likely(pte_none(*ptep))) {
269 3064 : set_pte_at(&init_mm, addr, ptep, pte);
270 3064 : page = 0;
271 : }
272 3064 : spin_unlock(&init_mm.page_table_lock);
273 3064 : if (page)
274 0 : free_page(page);
275 : return 0;
276 : }
277 :
278 11755 : int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
279 : {
280 11755 : unsigned long shadow_start, shadow_end;
281 11755 : int ret;
282 :
283 11755 : if (!is_vmalloc_or_module_addr((void *)addr))
284 : return 0;
285 :
286 11755 : shadow_start = (unsigned long)kasan_mem_to_shadow((void *)addr);
287 11755 : shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE);
288 11755 : shadow_end = (unsigned long)kasan_mem_to_shadow((void *)addr + size);
289 11755 : shadow_end = ALIGN(shadow_end, PAGE_SIZE);
290 :
291 11755 : ret = apply_to_page_range(&init_mm, shadow_start,
292 : shadow_end - shadow_start,
293 : kasan_populate_vmalloc_pte, NULL);
294 11755 : if (ret)
295 0 : return ret;
296 :
297 11755 : flush_cache_vmap(shadow_start, shadow_end);
298 :
299 : /*
300 : * We need to be careful about inter-cpu effects here. Consider:
301 : *
302 : * CPU#0 CPU#1
303 : * WRITE_ONCE(p, vmalloc(100)); while (x = READ_ONCE(p)) ;
304 : * p[99] = 1;
305 : *
306 : * With compiler instrumentation, that ends up looking like this:
307 : *
308 : * CPU#0 CPU#1
309 : * // vmalloc() allocates memory
310 : * // let a = area->addr
311 : * // we reach kasan_populate_vmalloc
312 : * // and call kasan_unpoison:
313 : * STORE shadow(a), unpoison_val
314 : * ...
315 : * STORE shadow(a+99), unpoison_val x = LOAD p
316 : * // rest of vmalloc process <data dependency>
317 : * STORE p, a LOAD shadow(x+99)
318 : *
319 : * If there is no barrier between the end of unpoisioning the shadow
320 : * and the store of the result to p, the stores could be committed
321 : * in a different order by CPU#0, and CPU#1 could erroneously observe
322 : * poison in the shadow.
323 : *
324 : * We need some sort of barrier between the stores.
325 : *
326 : * In the vmalloc() case, this is provided by a smp_wmb() in
327 : * clear_vm_uninitialized_flag(). In the per-cpu allocator and in
328 : * get_vm_area() and friends, the caller gets shadow allocated but
329 : * doesn't have any pages mapped into the virtual address space that
330 : * has been reserved. Mapping those pages in will involve taking and
331 : * releasing a page-table lock, which will provide the barrier.
332 : */
333 :
334 : return 0;
335 : }
336 :
337 : /*
338 : * Poison the shadow for a vmalloc region. Called as part of the
339 : * freeing process at the time the region is freed.
340 : */
341 11736 : void kasan_poison_vmalloc(const void *start, unsigned long size)
342 : {
343 11736 : if (!is_vmalloc_or_module_addr(start))
344 : return;
345 :
346 11736 : size = round_up(size, KASAN_GRANULE_SIZE);
347 11736 : kasan_poison(start, size, KASAN_VMALLOC_INVALID);
348 : }
349 :
350 11753 : void kasan_unpoison_vmalloc(const void *start, unsigned long size)
351 : {
352 11753 : if (!is_vmalloc_or_module_addr(start))
353 : return;
354 :
355 11753 : kasan_unpoison(start, size);
356 : }
357 :
358 65 : static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr,
359 : void *unused)
360 : {
361 65 : unsigned long page;
362 :
363 65 : page = (unsigned long)__va(pte_pfn(*ptep) << PAGE_SHIFT);
364 :
365 65 : spin_lock(&init_mm.page_table_lock);
366 :
367 65 : if (likely(!pte_none(*ptep))) {
368 65 : pte_clear(&init_mm, addr, ptep);
369 65 : free_page(page);
370 : }
371 65 : spin_unlock(&init_mm.page_table_lock);
372 :
373 65 : return 0;
374 : }
375 :
376 : /*
377 : * Release the backing for the vmalloc region [start, end), which
378 : * lies within the free region [free_region_start, free_region_end).
379 : *
380 : * This can be run lazily, long after the region was freed. It runs
381 : * under vmap_area_lock, so it's not safe to interact with the vmalloc/vmap
382 : * infrastructure.
383 : *
384 : * How does this work?
385 : * -------------------
386 : *
387 : * We have a region that is page aligned, labelled as A.
388 : * That might not map onto the shadow in a way that is page-aligned:
389 : *
390 : * start end
391 : * v v
392 : * |????????|????????|AAAAAAAA|AA....AA|AAAAAAAA|????????| < vmalloc
393 : * -------- -------- -------- -------- --------
394 : * | | | | |
395 : * | | | /-------/ |
396 : * \-------\|/------/ |/---------------/
397 : * ||| ||
398 : * |??AAAAAA|AAAAAAAA|AA??????| < shadow
399 : * (1) (2) (3)
400 : *
401 : * First we align the start upwards and the end downwards, so that the
402 : * shadow of the region aligns with shadow page boundaries. In the
403 : * example, this gives us the shadow page (2). This is the shadow entirely
404 : * covered by this allocation.
405 : *
406 : * Then we have the tricky bits. We want to know if we can free the
407 : * partially covered shadow pages - (1) and (3) in the example. For this,
408 : * we are given the start and end of the free region that contains this
409 : * allocation. Extending our previous example, we could have:
410 : *
411 : * free_region_start free_region_end
412 : * | start end |
413 : * v v v v
414 : * |FFFFFFFF|FFFFFFFF|AAAAAAAA|AA....AA|AAAAAAAA|FFFFFFFF| < vmalloc
415 : * -------- -------- -------- -------- --------
416 : * | | | | |
417 : * | | | /-------/ |
418 : * \-------\|/------/ |/---------------/
419 : * ||| ||
420 : * |FFAAAAAA|AAAAAAAA|AAF?????| < shadow
421 : * (1) (2) (3)
422 : *
423 : * Once again, we align the start of the free region up, and the end of
424 : * the free region down so that the shadow is page aligned. So we can free
425 : * page (1) - we know no allocation currently uses anything in that page,
426 : * because all of it is in the vmalloc free region. But we cannot free
427 : * page (3), because we can't be sure that the rest of it is unused.
428 : *
429 : * We only consider pages that contain part of the original region for
430 : * freeing: we don't try to free other pages from the free region or we'd
431 : * end up trying to free huge chunks of virtual address space.
432 : *
433 : * Concurrency
434 : * -----------
435 : *
436 : * How do we know that we're not freeing a page that is simultaneously
437 : * being used for a fresh allocation in kasan_populate_vmalloc(_pte)?
438 : *
439 : * We _can_ have kasan_release_vmalloc and kasan_populate_vmalloc running
440 : * at the same time. While we run under free_vmap_area_lock, the population
441 : * code does not.
442 : *
443 : * free_vmap_area_lock instead operates to ensure that the larger range
444 : * [free_region_start, free_region_end) is safe: because __alloc_vmap_area and
445 : * the per-cpu region-finding algorithm both run under free_vmap_area_lock,
446 : * no space identified as free will become used while we are running. This
447 : * means that so long as we are careful with alignment and only free shadow
448 : * pages entirely covered by the free region, we will not run in to any
449 : * trouble - any simultaneous allocations will be for disjoint regions.
450 : */
451 9 : void kasan_release_vmalloc(unsigned long start, unsigned long end,
452 : unsigned long free_region_start,
453 : unsigned long free_region_end)
454 : {
455 9 : void *shadow_start, *shadow_end;
456 9 : unsigned long region_start, region_end;
457 9 : unsigned long size;
458 :
459 9 : region_start = ALIGN(start, KASAN_MEMORY_PER_SHADOW_PAGE);
460 9 : region_end = ALIGN_DOWN(end, KASAN_MEMORY_PER_SHADOW_PAGE);
461 :
462 9 : free_region_start = ALIGN(free_region_start, KASAN_MEMORY_PER_SHADOW_PAGE);
463 :
464 9 : if (start != region_start &&
465 9 : free_region_start < region_start)
466 0 : region_start -= KASAN_MEMORY_PER_SHADOW_PAGE;
467 :
468 9 : free_region_end = ALIGN_DOWN(free_region_end, KASAN_MEMORY_PER_SHADOW_PAGE);
469 :
470 9 : if (end != region_end &&
471 9 : free_region_end > region_end)
472 2 : region_end += KASAN_MEMORY_PER_SHADOW_PAGE;
473 :
474 9 : shadow_start = kasan_mem_to_shadow((void *)region_start);
475 9 : shadow_end = kasan_mem_to_shadow((void *)region_end);
476 :
477 9 : if (shadow_end > shadow_start) {
478 7 : size = shadow_end - shadow_start;
479 7 : apply_to_existing_page_range(&init_mm,
480 : (unsigned long)shadow_start,
481 : size, kasan_depopulate_vmalloc_pte,
482 : NULL);
483 7 : flush_tlb_kernel_range((unsigned long)shadow_start,
484 : (unsigned long)shadow_end);
485 : }
486 9 : }
487 :
488 : #else /* CONFIG_KASAN_VMALLOC */
489 :
490 : int kasan_module_alloc(void *addr, size_t size)
491 : {
492 : void *ret;
493 : size_t scaled_size;
494 : size_t shadow_size;
495 : unsigned long shadow_start;
496 :
497 : shadow_start = (unsigned long)kasan_mem_to_shadow(addr);
498 : scaled_size = (size + KASAN_GRANULE_SIZE - 1) >>
499 : KASAN_SHADOW_SCALE_SHIFT;
500 : shadow_size = round_up(scaled_size, PAGE_SIZE);
501 :
502 : if (WARN_ON(!PAGE_ALIGNED(shadow_start)))
503 : return -EINVAL;
504 :
505 : ret = __vmalloc_node_range(shadow_size, 1, shadow_start,
506 : shadow_start + shadow_size,
507 : GFP_KERNEL,
508 : PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE,
509 : __builtin_return_address(0));
510 :
511 : if (ret) {
512 : __memset(ret, KASAN_SHADOW_INIT, shadow_size);
513 : find_vm_area(addr)->flags |= VM_KASAN;
514 : kmemleak_ignore(ret);
515 : return 0;
516 : }
517 :
518 : return -ENOMEM;
519 : }
520 :
521 : void kasan_free_shadow(const struct vm_struct *vm)
522 : {
523 : if (vm->flags & VM_KASAN)
524 : vfree(kasan_mem_to_shadow(vm->addr));
525 : }
526 :
527 : #endif
|