Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * SLUB: A slab allocator that limits cache line use instead of queuing
4 : * objects in per cpu and per node lists.
5 : *
6 : * The allocator synchronizes using per slab locks or atomic operatios
7 : * and only uses a centralized lock to manage a pool of partial slabs.
8 : *
9 : * (C) 2007 SGI, Christoph Lameter
10 : * (C) 2011 Linux Foundation, Christoph Lameter
11 : */
12 :
13 : #include <linux/mm.h>
14 : #include <linux/swap.h> /* struct reclaim_state */
15 : #include <linux/module.h>
16 : #include <linux/bit_spinlock.h>
17 : #include <linux/interrupt.h>
18 : #include <linux/bitops.h>
19 : #include <linux/slab.h>
20 : #include "slab.h"
21 : #include <linux/proc_fs.h>
22 : #include <linux/seq_file.h>
23 : #include <linux/kasan.h>
24 : #include <linux/cpu.h>
25 : #include <linux/cpuset.h>
26 : #include <linux/mempolicy.h>
27 : #include <linux/ctype.h>
28 : #include <linux/debugobjects.h>
29 : #include <linux/kallsyms.h>
30 : #include <linux/kfence.h>
31 : #include <linux/memory.h>
32 : #include <linux/math64.h>
33 : #include <linux/fault-inject.h>
34 : #include <linux/stacktrace.h>
35 : #include <linux/prefetch.h>
36 : #include <linux/memcontrol.h>
37 : #include <linux/random.h>
38 :
39 : #include <trace/events/kmem.h>
40 :
41 : #include "internal.h"
42 :
43 : /*
44 : * Lock order:
45 : * 1. slab_mutex (Global Mutex)
46 : * 2. node->list_lock
47 : * 3. slab_lock(page) (Only on some arches and for debugging)
48 : *
49 : * slab_mutex
50 : *
51 : * The role of the slab_mutex is to protect the list of all the slabs
52 : * and to synchronize major metadata changes to slab cache structures.
53 : *
54 : * The slab_lock is only used for debugging and on arches that do not
55 : * have the ability to do a cmpxchg_double. It only protects:
56 : * A. page->freelist -> List of object free in a page
57 : * B. page->inuse -> Number of objects in use
58 : * C. page->objects -> Number of objects in page
59 : * D. page->frozen -> frozen state
60 : *
61 : * If a slab is frozen then it is exempt from list management. It is not
62 : * on any list except per cpu partial list. The processor that froze the
63 : * slab is the one who can perform list operations on the page. Other
64 : * processors may put objects onto the freelist but the processor that
65 : * froze the slab is the only one that can retrieve the objects from the
66 : * page's freelist.
67 : *
68 : * The list_lock protects the partial and full list on each node and
69 : * the partial slab counter. If taken then no new slabs may be added or
70 : * removed from the lists nor make the number of partial slabs be modified.
71 : * (Note that the total number of slabs is an atomic value that may be
72 : * modified without taking the list lock).
73 : *
74 : * The list_lock is a centralized lock and thus we avoid taking it as
75 : * much as possible. As long as SLUB does not have to handle partial
76 : * slabs, operations can continue without any centralized lock. F.e.
77 : * allocating a long series of objects that fill up slabs does not require
78 : * the list lock.
79 : * Interrupts are disabled during allocation and deallocation in order to
80 : * make the slab allocator safe to use in the context of an irq. In addition
81 : * interrupts are disabled to ensure that the processor does not change
82 : * while handling per_cpu slabs, due to kernel preemption.
83 : *
84 : * SLUB assigns one slab for allocation to each processor.
85 : * Allocations only occur from these slabs called cpu slabs.
86 : *
87 : * Slabs with free elements are kept on a partial list and during regular
88 : * operations no list for full slabs is used. If an object in a full slab is
89 : * freed then the slab will show up again on the partial lists.
90 : * We track full slabs for debugging purposes though because otherwise we
91 : * cannot scan all objects.
92 : *
93 : * Slabs are freed when they become empty. Teardown and setup is
94 : * minimal so we rely on the page allocators per cpu caches for
95 : * fast frees and allocs.
96 : *
97 : * page->frozen The slab is frozen and exempt from list processing.
98 : * This means that the slab is dedicated to a purpose
99 : * such as satisfying allocations for a specific
100 : * processor. Objects may be freed in the slab while
101 : * it is frozen but slab_free will then skip the usual
102 : * list operations. It is up to the processor holding
103 : * the slab to integrate the slab into the slab lists
104 : * when the slab is no longer needed.
105 : *
106 : * One use of this flag is to mark slabs that are
107 : * used for allocations. Then such a slab becomes a cpu
108 : * slab. The cpu slab may be equipped with an additional
109 : * freelist that allows lockless access to
110 : * free objects in addition to the regular freelist
111 : * that requires the slab lock.
112 : *
113 : * SLAB_DEBUG_FLAGS Slab requires special handling due to debug
114 : * options set. This moves slab handling out of
115 : * the fast path and disables lockless freelists.
116 : */
117 :
118 : #ifdef CONFIG_SLUB_DEBUG
119 : #ifdef CONFIG_SLUB_DEBUG_ON
120 : DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
121 : #else
122 : DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
123 : #endif
124 : #endif
125 :
126 1459315 : static inline bool kmem_cache_debug(struct kmem_cache *s)
127 : {
128 1459315 : return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
129 : }
130 :
131 2057770 : void *fixup_red_left(struct kmem_cache *s, void *p)
132 : {
133 2057770 : if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
134 0 : p += s->red_left_pad;
135 :
136 2056413 : return p;
137 : }
138 :
139 642106 : static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
140 : {
141 : #ifdef CONFIG_SLUB_CPU_PARTIAL
142 : return !kmem_cache_debug(s);
143 : #else
144 642106 : return false;
145 : #endif
146 : }
147 :
148 : /*
149 : * Issues still to be resolved:
150 : *
151 : * - Support PAGE_ALLOC_DEBUG. Should be easy to do.
152 : *
153 : * - Variable sizing of the per node arrays
154 : */
155 :
156 : /* Enable to test recovery from slab corruption on boot */
157 : #undef SLUB_RESILIENCY_TEST
158 :
159 : /* Enable to log cmpxchg failures */
160 : #undef SLUB_DEBUG_CMPXCHG
161 :
162 : /*
163 : * Mininum number of partial slabs. These will be left on the partial
164 : * lists even if they are empty. kmem_cache_shrink may reclaim them.
165 : */
166 : #define MIN_PARTIAL 5
167 :
168 : /*
169 : * Maximum number of desirable partial slabs.
170 : * The existence of more partial slabs makes kmem_cache_shrink
171 : * sort the partial list by the number of objects in use.
172 : */
173 : #define MAX_PARTIAL 10
174 :
175 : #define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
176 : SLAB_POISON | SLAB_STORE_USER)
177 :
178 : /*
179 : * These debug flags cannot use CMPXCHG because there might be consistency
180 : * issues when checking or reading debug information
181 : */
182 : #define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \
183 : SLAB_TRACE)
184 :
185 :
186 : /*
187 : * Debugging flags that require metadata to be stored in the slab. These get
188 : * disabled when slub_debug=O is used and a cache's min order increases with
189 : * metadata.
190 : */
191 : #define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
192 :
193 : #define OO_SHIFT 16
194 : #define OO_MASK ((1 << OO_SHIFT) - 1)
195 : #define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */
196 :
197 : /* Internal SLUB flags */
198 : /* Poison object */
199 : #define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
200 : /* Use cmpxchg_double */
201 : #define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
202 :
203 : /*
204 : * Tracking user of a slab.
205 : */
206 : #define TRACK_ADDRS_COUNT 16
207 : struct track {
208 : unsigned long addr; /* Called from address */
209 : #ifdef CONFIG_STACKTRACE
210 : unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
211 : #endif
212 : int cpu; /* Was running on cpu */
213 : int pid; /* Pid context */
214 : unsigned long when; /* When did the operation occur */
215 : };
216 :
217 : enum track_item { TRACK_ALLOC, TRACK_FREE };
218 :
219 : #ifdef CONFIG_SYSFS
220 : static int sysfs_slab_add(struct kmem_cache *);
221 : static int sysfs_slab_alias(struct kmem_cache *, const char *);
222 : #else
223 : static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
224 : static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
225 : { return 0; }
226 : #endif
227 :
228 2158536 : static inline void stat(const struct kmem_cache *s, enum stat_item si)
229 : {
230 : #ifdef CONFIG_SLUB_STATS
231 : /*
232 : * The rmw is racy on a preemptible kernel but this is acceptable, so
233 : * avoid this_cpu_add()'s irq-disable overhead.
234 : */
235 : raw_cpu_inc(s->cpu_slab->stat[si]);
236 : #endif
237 2158536 : }
238 :
239 : /*
240 : * Tracks for which NUMA nodes we have kmem_cache_nodes allocated.
241 : * Corresponds to node_state[N_NORMAL_MEMORY], but can temporarily
242 : * differ during memory hotplug/hotremove operations.
243 : * Protected by slab_mutex.
244 : */
245 : static nodemask_t slab_nodes;
246 :
247 : /********************************************************************
248 : * Core slab cache functions
249 : *******************************************************************/
250 :
251 : /*
252 : * Returns freelist pointer (ptr). With hardening, this is obfuscated
253 : * with an XOR of the address where the pointer is held and a per-cache
254 : * random number.
255 : */
256 4558318 : static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
257 : unsigned long ptr_addr)
258 : {
259 : #ifdef CONFIG_SLAB_FREELIST_HARDENED
260 : /*
261 : * When CONFIG_KASAN_SW/HW_TAGS is enabled, ptr_addr might be tagged.
262 : * Normally, this doesn't cause any issues, as both set_freepointer()
263 : * and get_freepointer() are called with a pointer with the same tag.
264 : * However, there are some issues with CONFIG_SLUB_DEBUG code. For
265 : * example, when __free_slub() iterates over objects in a cache, it
266 : * passes untagged pointers to check_object(). check_object() in turns
267 : * calls get_freepointer() with an untagged pointer, which causes the
268 : * freepointer to be restored incorrectly.
269 : */
270 : return (void *)((unsigned long)ptr ^ s->random ^
271 : swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
272 : #else
273 4558318 : return ptr;
274 : #endif
275 : }
276 :
277 : /* Returns the freelist pointer recorded at location ptr_addr. */
278 2774985 : static inline void *freelist_dereference(const struct kmem_cache *s,
279 : void *ptr_addr)
280 : {
281 2774985 : return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr),
282 : (unsigned long)ptr_addr);
283 : }
284 :
285 2774985 : static inline void *get_freepointer(struct kmem_cache *s, void *object)
286 : {
287 2774985 : object = kasan_reset_tag(object);
288 0 : return freelist_dereference(s, object + s->offset);
289 : }
290 :
291 1242978 : static void prefetch_freepointer(const struct kmem_cache *s, void *object)
292 : {
293 1242978 : prefetch(object + s->offset);
294 1242999 : }
295 :
296 1242969 : static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
297 : {
298 1242969 : unsigned long freepointer_addr;
299 1242969 : void *p;
300 :
301 1242969 : if (!debug_pagealloc_enabled_static())
302 1242969 : return get_freepointer(s, object);
303 :
304 : freepointer_addr = (unsigned long)object + s->offset;
305 : copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p));
306 : return freelist_ptr(s, p, freepointer_addr);
307 : }
308 :
309 1783333 : static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
310 : {
311 1783333 : unsigned long freeptr_addr = (unsigned long)object + s->offset;
312 :
313 : #ifdef CONFIG_SLAB_FREELIST_HARDENED
314 : BUG_ON(object == fp); /* naive detection of double free or corruption */
315 : #endif
316 :
317 1783333 : freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr);
318 1783333 : *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
319 0 : }
320 :
321 : /* Loop over all objects in a slab */
322 : #define for_each_object(__p, __s, __addr, __objects) \
323 : for (__p = fixup_red_left(__s, __addr); \
324 : __p < (__addr) + (__objects) * (__s)->size; \
325 : __p += (__s)->size)
326 :
327 585 : static inline unsigned int order_objects(unsigned int order, unsigned int size)
328 : {
329 585 : return ((unsigned int)PAGE_SIZE << order) / size;
330 : }
331 :
332 290 : static inline struct kmem_cache_order_objects oo_make(unsigned int order,
333 : unsigned int size)
334 : {
335 290 : struct kmem_cache_order_objects x = {
336 290 : (order << OO_SHIFT) + order_objects(order, size)
337 : };
338 :
339 290 : return x;
340 : }
341 :
342 82044 : static inline unsigned int oo_order(struct kmem_cache_order_objects x)
343 : {
344 82044 : return x.x >> OO_SHIFT;
345 : }
346 :
347 28434 : static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
348 : {
349 28434 : return x.x & OO_MASK;
350 : }
351 :
352 : /*
353 : * Per slab locking using the pagelock
354 : */
355 0 : static __always_inline void slab_lock(struct page *page)
356 : {
357 0 : VM_BUG_ON_PAGE(PageTail(page), page);
358 0 : bit_spin_lock(PG_locked, &page->flags);
359 : }
360 :
361 0 : static __always_inline void slab_unlock(struct page *page)
362 : {
363 0 : VM_BUG_ON_PAGE(PageTail(page), page);
364 0 : __bit_spin_unlock(PG_locked, &page->flags);
365 0 : }
366 :
367 : /* Interrupts must be disabled (for the fallback code to work right) */
368 422976 : static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
369 : void *freelist_old, unsigned long counters_old,
370 : void *freelist_new, unsigned long counters_new,
371 : const char *n)
372 : {
373 422976 : VM_BUG_ON(!irqs_disabled());
374 : #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
375 : defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
376 422976 : if (s->flags & __CMPXCHG_DOUBLE) {
377 422976 : if (cmpxchg_double(&page->freelist, &page->counters,
378 : freelist_old, counters_old,
379 : freelist_new, counters_new))
380 : return true;
381 : } else
382 : #endif
383 : {
384 0 : slab_lock(page);
385 0 : if (page->freelist == freelist_old &&
386 0 : page->counters == counters_old) {
387 0 : page->freelist = freelist_new;
388 0 : page->counters = counters_new;
389 0 : slab_unlock(page);
390 0 : return true;
391 : }
392 0 : slab_unlock(page);
393 : }
394 :
395 0 : cpu_relax();
396 0 : stat(s, CMPXCHG_DOUBLE_FAIL);
397 :
398 : #ifdef SLUB_DEBUG_CMPXCHG
399 : pr_info("%s %s: cmpxchg double redo ", n, s->name);
400 : #endif
401 :
402 0 : return false;
403 : }
404 :
405 1235803 : static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
406 : void *freelist_old, unsigned long counters_old,
407 : void *freelist_new, unsigned long counters_new,
408 : const char *n)
409 : {
410 : #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
411 : defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
412 1235803 : if (s->flags & __CMPXCHG_DOUBLE) {
413 1235803 : if (cmpxchg_double(&page->freelist, &page->counters,
414 : freelist_old, counters_old,
415 : freelist_new, counters_new))
416 : return true;
417 : } else
418 : #endif
419 : {
420 0 : unsigned long flags;
421 :
422 0 : local_irq_save(flags);
423 0 : slab_lock(page);
424 0 : if (page->freelist == freelist_old &&
425 0 : page->counters == counters_old) {
426 0 : page->freelist = freelist_new;
427 0 : page->counters = counters_new;
428 0 : slab_unlock(page);
429 0 : local_irq_restore(flags);
430 0 : return true;
431 : }
432 0 : slab_unlock(page);
433 0 : local_irq_restore(flags);
434 : }
435 :
436 0 : cpu_relax();
437 0 : stat(s, CMPXCHG_DOUBLE_FAIL);
438 :
439 : #ifdef SLUB_DEBUG_CMPXCHG
440 : pr_info("%s %s: cmpxchg double redo ", n, s->name);
441 : #endif
442 :
443 0 : return false;
444 : }
445 :
446 : #ifdef CONFIG_SLUB_DEBUG
447 : static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
448 : static DEFINE_SPINLOCK(object_map_lock);
449 :
450 : /*
451 : * Determine a map of object in use on a page.
452 : *
453 : * Node listlock must be held to guarantee that the page does
454 : * not vanish from under us.
455 : */
456 0 : static unsigned long *get_map(struct kmem_cache *s, struct page *page)
457 : __acquires(&object_map_lock)
458 : {
459 0 : void *p;
460 0 : void *addr = page_address(page);
461 :
462 0 : VM_BUG_ON(!irqs_disabled());
463 :
464 0 : spin_lock(&object_map_lock);
465 :
466 0 : bitmap_zero(object_map, page->objects);
467 :
468 0 : for (p = page->freelist; p; p = get_freepointer(s, p))
469 0 : set_bit(__obj_to_index(s, addr, p), object_map);
470 :
471 0 : return object_map;
472 : }
473 :
474 0 : static void put_map(unsigned long *map) __releases(&object_map_lock)
475 : {
476 0 : VM_BUG_ON(map != object_map);
477 0 : spin_unlock(&object_map_lock);
478 0 : }
479 :
480 0 : static inline unsigned int size_from_object(struct kmem_cache *s)
481 : {
482 0 : if (s->flags & SLAB_RED_ZONE)
483 0 : return s->size - s->red_left_pad;
484 :
485 0 : return s->size;
486 : }
487 :
488 0 : static inline void *restore_red_left(struct kmem_cache *s, void *p)
489 : {
490 0 : if (s->flags & SLAB_RED_ZONE)
491 0 : p -= s->red_left_pad;
492 :
493 0 : return p;
494 : }
495 :
496 : /*
497 : * Debug settings:
498 : */
499 : #if defined(CONFIG_SLUB_DEBUG_ON)
500 : static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
501 : #else
502 : static slab_flags_t slub_debug;
503 : #endif
504 :
505 : static char *slub_debug_string;
506 : static int disable_higher_order_debug;
507 :
508 : /*
509 : * slub is about to manipulate internal object metadata. This memory lies
510 : * outside the range of the allocated object, so accessing it would normally
511 : * be reported by kasan as a bounds error. metadata_access_enable() is used
512 : * to tell kasan that these accesses are OK.
513 : */
514 0 : static inline void metadata_access_enable(void)
515 : {
516 0 : kasan_disable_current();
517 : }
518 :
519 0 : static inline void metadata_access_disable(void)
520 : {
521 0 : kasan_enable_current();
522 0 : }
523 :
524 : /*
525 : * Object debugging
526 : */
527 :
528 : /* Verify that a pointer has an address that is valid within a slab page */
529 0 : static inline int check_valid_pointer(struct kmem_cache *s,
530 : struct page *page, void *object)
531 : {
532 0 : void *base;
533 :
534 0 : if (!object)
535 : return 1;
536 :
537 0 : base = page_address(page);
538 0 : object = kasan_reset_tag(object);
539 0 : object = restore_red_left(s, object);
540 0 : if (object < base || object >= base + page->objects * s->size ||
541 0 : (object - base) % s->size) {
542 0 : return 0;
543 : }
544 :
545 : return 1;
546 : }
547 :
548 0 : static void print_section(char *level, char *text, u8 *addr,
549 : unsigned int length)
550 : {
551 0 : metadata_access_enable();
552 0 : print_hex_dump(level, kasan_reset_tag(text), DUMP_PREFIX_ADDRESS,
553 : 16, 1, addr, length, 1);
554 0 : metadata_access_disable();
555 0 : }
556 :
557 : /*
558 : * See comment in calculate_sizes().
559 : */
560 0 : static inline bool freeptr_outside_object(struct kmem_cache *s)
561 : {
562 0 : return s->offset >= s->inuse;
563 : }
564 :
565 : /*
566 : * Return offset of the end of info block which is inuse + free pointer if
567 : * not overlapping with object.
568 : */
569 0 : static inline unsigned int get_info_end(struct kmem_cache *s)
570 : {
571 0 : if (freeptr_outside_object(s))
572 0 : return s->inuse + sizeof(void *);
573 : else
574 : return s->inuse;
575 : }
576 :
577 0 : static struct track *get_track(struct kmem_cache *s, void *object,
578 : enum track_item alloc)
579 : {
580 0 : struct track *p;
581 :
582 0 : p = object + get_info_end(s);
583 :
584 0 : return kasan_reset_tag(p + alloc);
585 : }
586 :
587 0 : static void set_track(struct kmem_cache *s, void *object,
588 : enum track_item alloc, unsigned long addr)
589 : {
590 0 : struct track *p = get_track(s, object, alloc);
591 :
592 0 : if (addr) {
593 : #ifdef CONFIG_STACKTRACE
594 0 : unsigned int nr_entries;
595 :
596 0 : metadata_access_enable();
597 0 : nr_entries = stack_trace_save(kasan_reset_tag(p->addrs),
598 : TRACK_ADDRS_COUNT, 3);
599 0 : metadata_access_disable();
600 :
601 0 : if (nr_entries < TRACK_ADDRS_COUNT)
602 0 : p->addrs[nr_entries] = 0;
603 : #endif
604 0 : p->addr = addr;
605 0 : p->cpu = smp_processor_id();
606 0 : p->pid = current->pid;
607 0 : p->when = jiffies;
608 : } else {
609 0 : memset(p, 0, sizeof(struct track));
610 : }
611 0 : }
612 :
613 1 : static void init_tracking(struct kmem_cache *s, void *object)
614 : {
615 1 : if (!(s->flags & SLAB_STORE_USER))
616 : return;
617 :
618 0 : set_track(s, object, TRACK_FREE, 0UL);
619 0 : set_track(s, object, TRACK_ALLOC, 0UL);
620 : }
621 :
622 0 : static void print_track(const char *s, struct track *t, unsigned long pr_time)
623 : {
624 0 : if (!t->addr)
625 : return;
626 :
627 0 : pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
628 : s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
629 : #ifdef CONFIG_STACKTRACE
630 : {
631 0 : int i;
632 0 : for (i = 0; i < TRACK_ADDRS_COUNT; i++)
633 0 : if (t->addrs[i])
634 0 : pr_err("\t%pS\n", (void *)t->addrs[i]);
635 : else
636 : break;
637 : }
638 : #endif
639 : }
640 :
641 0 : void print_tracking(struct kmem_cache *s, void *object)
642 : {
643 0 : unsigned long pr_time = jiffies;
644 0 : if (!(s->flags & SLAB_STORE_USER))
645 : return;
646 :
647 0 : print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
648 0 : print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
649 : }
650 :
651 0 : static void print_page_info(struct page *page)
652 : {
653 0 : pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
654 : page, page->objects, page->inuse, page->freelist, page->flags);
655 :
656 0 : }
657 :
658 0 : static void slab_bug(struct kmem_cache *s, char *fmt, ...)
659 : {
660 0 : struct va_format vaf;
661 0 : va_list args;
662 :
663 0 : va_start(args, fmt);
664 0 : vaf.fmt = fmt;
665 0 : vaf.va = &args;
666 0 : pr_err("=============================================================================\n");
667 0 : pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
668 0 : pr_err("-----------------------------------------------------------------------------\n\n");
669 :
670 0 : add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
671 0 : va_end(args);
672 0 : }
673 :
674 0 : static void slab_fix(struct kmem_cache *s, char *fmt, ...)
675 : {
676 0 : struct va_format vaf;
677 0 : va_list args;
678 :
679 0 : va_start(args, fmt);
680 0 : vaf.fmt = fmt;
681 0 : vaf.va = &args;
682 0 : pr_err("FIX %s: %pV\n", s->name, &vaf);
683 0 : va_end(args);
684 0 : }
685 :
686 43 : static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
687 : void **freelist, void *nextfree)
688 : {
689 43 : if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
690 0 : !check_valid_pointer(s, page, nextfree) && freelist) {
691 0 : object_err(s, page, *freelist, "Freechain corrupt");
692 0 : *freelist = NULL;
693 0 : slab_fix(s, "Isolate corrupted freechain");
694 0 : return true;
695 : }
696 :
697 : return false;
698 : }
699 :
700 0 : static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
701 : {
702 0 : unsigned int off; /* Offset of last byte */
703 0 : u8 *addr = page_address(page);
704 :
705 0 : print_tracking(s, p);
706 :
707 0 : print_page_info(page);
708 :
709 0 : pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
710 : p, p - addr, get_freepointer(s, p));
711 :
712 0 : if (s->flags & SLAB_RED_ZONE)
713 0 : print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
714 : s->red_left_pad);
715 0 : else if (p > addr + 16)
716 0 : print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
717 :
718 0 : print_section(KERN_ERR, "Object ", p,
719 0 : min_t(unsigned int, s->object_size, PAGE_SIZE));
720 0 : if (s->flags & SLAB_RED_ZONE)
721 0 : print_section(KERN_ERR, "Redzone ", p + s->object_size,
722 0 : s->inuse - s->object_size);
723 :
724 0 : off = get_info_end(s);
725 :
726 0 : if (s->flags & SLAB_STORE_USER)
727 0 : off += 2 * sizeof(struct track);
728 :
729 0 : off += kasan_metadata_size(s);
730 :
731 0 : if (off != size_from_object(s))
732 : /* Beginning of the filler is the free pointer */
733 0 : print_section(KERN_ERR, "Padding ", p + off,
734 0 : size_from_object(s) - off);
735 :
736 0 : dump_stack();
737 0 : }
738 :
739 0 : void object_err(struct kmem_cache *s, struct page *page,
740 : u8 *object, char *reason)
741 : {
742 0 : slab_bug(s, "%s", reason);
743 0 : print_trailer(s, page, object);
744 0 : }
745 :
746 0 : static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
747 : const char *fmt, ...)
748 : {
749 0 : va_list args;
750 0 : char buf[100];
751 :
752 0 : va_start(args, fmt);
753 0 : vsnprintf(buf, sizeof(buf), fmt, args);
754 0 : va_end(args);
755 0 : slab_bug(s, "%s", buf);
756 0 : print_page_info(page);
757 0 : dump_stack();
758 0 : }
759 :
760 1 : static void init_object(struct kmem_cache *s, void *object, u8 val)
761 : {
762 1 : u8 *p = kasan_reset_tag(object);
763 :
764 1 : if (s->flags & SLAB_RED_ZONE)
765 0 : memset(p - s->red_left_pad, val, s->red_left_pad);
766 :
767 1 : if (s->flags & __OBJECT_POISON) {
768 0 : memset(p, POISON_FREE, s->object_size - 1);
769 0 : p[s->object_size - 1] = POISON_END;
770 : }
771 :
772 1 : if (s->flags & SLAB_RED_ZONE)
773 0 : memset(p + s->object_size, val, s->inuse - s->object_size);
774 1 : }
775 :
776 0 : static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
777 : void *from, void *to)
778 : {
779 0 : slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
780 0 : memset(from, data, to - from);
781 0 : }
782 :
783 0 : static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
784 : u8 *object, char *what,
785 : u8 *start, unsigned int value, unsigned int bytes)
786 : {
787 0 : u8 *fault;
788 0 : u8 *end;
789 0 : u8 *addr = page_address(page);
790 :
791 0 : metadata_access_enable();
792 0 : fault = memchr_inv(kasan_reset_tag(start), value, bytes);
793 0 : metadata_access_disable();
794 0 : if (!fault)
795 : return 1;
796 :
797 0 : end = start + bytes;
798 0 : while (end > fault && end[-1] == value)
799 0 : end--;
800 :
801 0 : slab_bug(s, "%s overwritten", what);
802 0 : pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
803 : fault, end - 1, fault - addr,
804 : fault[0], value);
805 0 : print_trailer(s, page, object);
806 :
807 0 : restore_bytes(s, what, value, fault, end);
808 0 : return 0;
809 : }
810 :
811 : /*
812 : * Object layout:
813 : *
814 : * object address
815 : * Bytes of the object to be managed.
816 : * If the freepointer may overlay the object then the free
817 : * pointer is at the middle of the object.
818 : *
819 : * Poisoning uses 0x6b (POISON_FREE) and the last byte is
820 : * 0xa5 (POISON_END)
821 : *
822 : * object + s->object_size
823 : * Padding to reach word boundary. This is also used for Redzoning.
824 : * Padding is extended by another word if Redzoning is enabled and
825 : * object_size == inuse.
826 : *
827 : * We fill with 0xbb (RED_INACTIVE) for inactive objects and with
828 : * 0xcc (RED_ACTIVE) for objects in use.
829 : *
830 : * object + s->inuse
831 : * Meta data starts here.
832 : *
833 : * A. Free pointer (if we cannot overwrite object on free)
834 : * B. Tracking data for SLAB_STORE_USER
835 : * C. Padding to reach required alignment boundary or at mininum
836 : * one word if debugging is on to be able to detect writes
837 : * before the word boundary.
838 : *
839 : * Padding is done using 0x5a (POISON_INUSE)
840 : *
841 : * object + s->size
842 : * Nothing is used beyond s->size.
843 : *
844 : * If slabcaches are merged then the object_size and inuse boundaries are mostly
845 : * ignored. And therefore no slab options that rely on these boundaries
846 : * may be used with merged slabcaches.
847 : */
848 :
849 0 : static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
850 : {
851 0 : unsigned long off = get_info_end(s); /* The end of info */
852 :
853 0 : if (s->flags & SLAB_STORE_USER)
854 : /* We also have user information there */
855 0 : off += 2 * sizeof(struct track);
856 :
857 0 : off += kasan_metadata_size(s);
858 :
859 0 : if (size_from_object(s) == off)
860 : return 1;
861 :
862 0 : return check_bytes_and_report(s, page, p, "Object padding",
863 0 : p + off, POISON_INUSE, size_from_object(s) - off);
864 : }
865 :
866 : /* Check the pad bytes at the end of a slab page */
867 0 : static int slab_pad_check(struct kmem_cache *s, struct page *page)
868 : {
869 0 : u8 *start;
870 0 : u8 *fault;
871 0 : u8 *end;
872 0 : u8 *pad;
873 0 : int length;
874 0 : int remainder;
875 :
876 0 : if (!(s->flags & SLAB_POISON))
877 : return 1;
878 :
879 0 : start = page_address(page);
880 0 : length = page_size(page);
881 0 : end = start + length;
882 0 : remainder = length % s->size;
883 0 : if (!remainder)
884 : return 1;
885 :
886 0 : pad = end - remainder;
887 0 : metadata_access_enable();
888 0 : fault = memchr_inv(kasan_reset_tag(pad), POISON_INUSE, remainder);
889 0 : metadata_access_disable();
890 0 : if (!fault)
891 : return 1;
892 0 : while (end > fault && end[-1] == POISON_INUSE)
893 0 : end--;
894 :
895 0 : slab_err(s, page, "Padding overwritten. 0x%p-0x%p @offset=%tu",
896 : fault, end - 1, fault - start);
897 0 : print_section(KERN_ERR, "Padding ", pad, remainder);
898 :
899 0 : restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
900 0 : return 0;
901 : }
902 :
903 0 : static int check_object(struct kmem_cache *s, struct page *page,
904 : void *object, u8 val)
905 : {
906 0 : u8 *p = object;
907 0 : u8 *endobject = object + s->object_size;
908 :
909 0 : if (s->flags & SLAB_RED_ZONE) {
910 0 : if (!check_bytes_and_report(s, page, object, "Redzone",
911 0 : object - s->red_left_pad, val, s->red_left_pad))
912 : return 0;
913 :
914 0 : if (!check_bytes_and_report(s, page, object, "Redzone",
915 0 : endobject, val, s->inuse - s->object_size))
916 : return 0;
917 : } else {
918 0 : if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
919 0 : check_bytes_and_report(s, page, p, "Alignment padding",
920 : endobject, POISON_INUSE,
921 : s->inuse - s->object_size);
922 : }
923 : }
924 :
925 0 : if (s->flags & SLAB_POISON) {
926 0 : if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
927 0 : (!check_bytes_and_report(s, page, p, "Poison", p,
928 0 : POISON_FREE, s->object_size - 1) ||
929 0 : !check_bytes_and_report(s, page, p, "Poison",
930 0 : p + s->object_size - 1, POISON_END, 1)))
931 0 : return 0;
932 : /*
933 : * check_pad_bytes cleans up on its own.
934 : */
935 0 : check_pad_bytes(s, page, p);
936 : }
937 :
938 0 : if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
939 : /*
940 : * Object and freepointer overlap. Cannot check
941 : * freepointer while object is allocated.
942 : */
943 : return 1;
944 :
945 : /* Check free pointer validity */
946 0 : if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
947 0 : object_err(s, page, p, "Freepointer corrupt");
948 : /*
949 : * No choice but to zap it and thus lose the remainder
950 : * of the free objects in this slab. May cause
951 : * another error because the object count is now wrong.
952 : */
953 0 : set_freepointer(s, p, NULL);
954 0 : return 0;
955 : }
956 : return 1;
957 : }
958 :
959 0 : static int check_slab(struct kmem_cache *s, struct page *page)
960 : {
961 0 : int maxobj;
962 :
963 0 : VM_BUG_ON(!irqs_disabled());
964 :
965 0 : if (!PageSlab(page)) {
966 0 : slab_err(s, page, "Not a valid slab page");
967 0 : return 0;
968 : }
969 :
970 0 : maxobj = order_objects(compound_order(page), s->size);
971 0 : if (page->objects > maxobj) {
972 0 : slab_err(s, page, "objects %u > max %u",
973 : page->objects, maxobj);
974 0 : return 0;
975 : }
976 0 : if (page->inuse > page->objects) {
977 0 : slab_err(s, page, "inuse %u > max %u",
978 : page->inuse, page->objects);
979 0 : return 0;
980 : }
981 : /* Slab_pad_check fixes things up after itself */
982 0 : slab_pad_check(s, page);
983 0 : return 1;
984 : }
985 :
986 : /*
987 : * Determine if a certain object on a page is on the freelist. Must hold the
988 : * slab lock to guarantee that the chains are in a consistent state.
989 : */
990 0 : static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
991 : {
992 0 : int nr = 0;
993 0 : void *fp;
994 0 : void *object = NULL;
995 0 : int max_objects;
996 :
997 0 : fp = page->freelist;
998 0 : while (fp && nr <= page->objects) {
999 0 : if (fp == search)
1000 : return 1;
1001 0 : if (!check_valid_pointer(s, page, fp)) {
1002 0 : if (object) {
1003 0 : object_err(s, page, object,
1004 : "Freechain corrupt");
1005 0 : set_freepointer(s, object, NULL);
1006 : } else {
1007 0 : slab_err(s, page, "Freepointer corrupt");
1008 0 : page->freelist = NULL;
1009 0 : page->inuse = page->objects;
1010 0 : slab_fix(s, "Freelist cleared");
1011 0 : return 0;
1012 : }
1013 : break;
1014 : }
1015 0 : object = fp;
1016 0 : fp = get_freepointer(s, object);
1017 0 : nr++;
1018 : }
1019 :
1020 0 : max_objects = order_objects(compound_order(page), s->size);
1021 0 : if (max_objects > MAX_OBJS_PER_PAGE)
1022 : max_objects = MAX_OBJS_PER_PAGE;
1023 :
1024 0 : if (page->objects != max_objects) {
1025 0 : slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
1026 : page->objects, max_objects);
1027 0 : page->objects = max_objects;
1028 0 : slab_fix(s, "Number of objects adjusted.");
1029 : }
1030 0 : if (page->inuse != page->objects - nr) {
1031 0 : slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
1032 : page->inuse, page->objects - nr);
1033 0 : page->inuse = page->objects - nr;
1034 0 : slab_fix(s, "Object count adjusted.");
1035 : }
1036 0 : return search == NULL;
1037 : }
1038 :
1039 0 : static void trace(struct kmem_cache *s, struct page *page, void *object,
1040 : int alloc)
1041 : {
1042 0 : if (s->flags & SLAB_TRACE) {
1043 0 : pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
1044 : s->name,
1045 : alloc ? "alloc" : "free",
1046 : object, page->inuse,
1047 : page->freelist);
1048 :
1049 0 : if (!alloc)
1050 0 : print_section(KERN_INFO, "Object ", (void *)object,
1051 : s->object_size);
1052 :
1053 0 : dump_stack();
1054 : }
1055 0 : }
1056 :
1057 : /*
1058 : * Tracking of fully allocated slabs for debugging purposes.
1059 : */
1060 0 : static void add_full(struct kmem_cache *s,
1061 : struct kmem_cache_node *n, struct page *page)
1062 : {
1063 0 : if (!(s->flags & SLAB_STORE_USER))
1064 : return;
1065 :
1066 0 : lockdep_assert_held(&n->list_lock);
1067 0 : list_add(&page->slab_list, &n->full);
1068 : }
1069 :
1070 213438 : static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
1071 : {
1072 213438 : if (!(s->flags & SLAB_STORE_USER))
1073 : return;
1074 :
1075 0 : lockdep_assert_held(&n->list_lock);
1076 0 : list_del(&page->slab_list);
1077 : }
1078 :
1079 : /* Tracking of the number of slabs for debugging purposes */
1080 0 : static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1081 : {
1082 0 : struct kmem_cache_node *n = get_node(s, node);
1083 :
1084 0 : return atomic_long_read(&n->nr_slabs);
1085 : }
1086 :
1087 0 : static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1088 : {
1089 0 : return atomic_long_read(&n->nr_slabs);
1090 : }
1091 :
1092 28151 : static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1093 : {
1094 28151 : struct kmem_cache_node *n = get_node(s, node);
1095 :
1096 : /*
1097 : * May be called early in order to allocate a slab for the
1098 : * kmem_cache_node structure. Solve the chicken-egg
1099 : * dilemma by deferring the increment of the count during
1100 : * bootstrap (see early_kmem_cache_node_alloc).
1101 : */
1102 28151 : if (likely(n)) {
1103 28150 : atomic_long_inc(&n->nr_slabs);
1104 28150 : atomic_long_add(objects, &n->total_objects);
1105 : }
1106 28151 : }
1107 17351 : static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1108 : {
1109 17351 : struct kmem_cache_node *n = get_node(s, node);
1110 :
1111 17351 : atomic_long_dec(&n->nr_slabs);
1112 17351 : atomic_long_sub(objects, &n->total_objects);
1113 17351 : }
1114 :
1115 : /* Object debug checks for alloc/free paths */
1116 487601 : static void setup_object_debug(struct kmem_cache *s, struct page *page,
1117 : void *object)
1118 : {
1119 487601 : if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))
1120 : return;
1121 :
1122 0 : init_object(s, object, SLUB_RED_INACTIVE);
1123 0 : init_tracking(s, object);
1124 : }
1125 :
1126 : static
1127 28149 : void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
1128 : {
1129 28149 : if (!kmem_cache_debug_flags(s, SLAB_POISON))
1130 : return;
1131 :
1132 0 : metadata_access_enable();
1133 0 : memset(kasan_reset_tag(addr), POISON_INUSE, page_size(page));
1134 0 : metadata_access_disable();
1135 : }
1136 :
1137 0 : static inline int alloc_consistency_checks(struct kmem_cache *s,
1138 : struct page *page, void *object)
1139 : {
1140 0 : if (!check_slab(s, page))
1141 : return 0;
1142 :
1143 0 : if (!check_valid_pointer(s, page, object)) {
1144 0 : object_err(s, page, object, "Freelist Pointer check fails");
1145 0 : return 0;
1146 : }
1147 :
1148 0 : if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1149 0 : return 0;
1150 :
1151 : return 1;
1152 : }
1153 :
1154 0 : static noinline int alloc_debug_processing(struct kmem_cache *s,
1155 : struct page *page,
1156 : void *object, unsigned long addr)
1157 : {
1158 0 : if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1159 0 : if (!alloc_consistency_checks(s, page, object))
1160 0 : goto bad;
1161 : }
1162 :
1163 : /* Success perform special debug activities for allocs */
1164 0 : if (s->flags & SLAB_STORE_USER)
1165 0 : set_track(s, object, TRACK_ALLOC, addr);
1166 0 : trace(s, page, object, 1);
1167 0 : init_object(s, object, SLUB_RED_ACTIVE);
1168 0 : return 1;
1169 :
1170 0 : bad:
1171 0 : if (PageSlab(page)) {
1172 : /*
1173 : * If this is a slab page then lets do the best we can
1174 : * to avoid issues in the future. Marking all objects
1175 : * as used avoids touching the remaining objects.
1176 : */
1177 0 : slab_fix(s, "Marking all objects used");
1178 0 : page->inuse = page->objects;
1179 0 : page->freelist = NULL;
1180 : }
1181 : return 0;
1182 : }
1183 :
1184 0 : static inline int free_consistency_checks(struct kmem_cache *s,
1185 : struct page *page, void *object, unsigned long addr)
1186 : {
1187 0 : if (!check_valid_pointer(s, page, object)) {
1188 0 : slab_err(s, page, "Invalid object pointer 0x%p", object);
1189 0 : return 0;
1190 : }
1191 :
1192 0 : if (on_freelist(s, page, object)) {
1193 0 : object_err(s, page, object, "Object already free");
1194 0 : return 0;
1195 : }
1196 :
1197 0 : if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1198 : return 0;
1199 :
1200 0 : if (unlikely(s != page->slab_cache)) {
1201 0 : if (!PageSlab(page)) {
1202 0 : slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
1203 : object);
1204 0 : } else if (!page->slab_cache) {
1205 0 : pr_err("SLUB <none>: no slab for object 0x%p.\n",
1206 : object);
1207 0 : dump_stack();
1208 : } else
1209 0 : object_err(s, page, object,
1210 : "page slab pointer corrupt.");
1211 0 : return 0;
1212 : }
1213 : return 1;
1214 : }
1215 :
1216 : /* Supports checking bulk free of a constructed freelist */
1217 0 : static noinline int free_debug_processing(
1218 : struct kmem_cache *s, struct page *page,
1219 : void *head, void *tail, int bulk_cnt,
1220 : unsigned long addr)
1221 : {
1222 0 : struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1223 0 : void *object = head;
1224 0 : int cnt = 0;
1225 0 : unsigned long flags;
1226 0 : int ret = 0;
1227 :
1228 0 : spin_lock_irqsave(&n->list_lock, flags);
1229 0 : slab_lock(page);
1230 :
1231 0 : if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1232 0 : if (!check_slab(s, page))
1233 0 : goto out;
1234 : }
1235 :
1236 0 : next_object:
1237 0 : cnt++;
1238 :
1239 0 : if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1240 0 : if (!free_consistency_checks(s, page, object, addr))
1241 0 : goto out;
1242 : }
1243 :
1244 0 : if (s->flags & SLAB_STORE_USER)
1245 0 : set_track(s, object, TRACK_FREE, addr);
1246 0 : trace(s, page, object, 0);
1247 : /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
1248 0 : init_object(s, object, SLUB_RED_INACTIVE);
1249 :
1250 : /* Reached end of constructed freelist yet? */
1251 0 : if (object != tail) {
1252 0 : object = get_freepointer(s, object);
1253 0 : goto next_object;
1254 : }
1255 : ret = 1;
1256 :
1257 0 : out:
1258 0 : if (cnt != bulk_cnt)
1259 0 : slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
1260 : bulk_cnt, cnt);
1261 :
1262 0 : slab_unlock(page);
1263 0 : spin_unlock_irqrestore(&n->list_lock, flags);
1264 0 : if (!ret)
1265 0 : slab_fix(s, "Object at 0x%p not freed", object);
1266 0 : return ret;
1267 : }
1268 :
1269 : /*
1270 : * Parse a block of slub_debug options. Blocks are delimited by ';'
1271 : *
1272 : * @str: start of block
1273 : * @flags: returns parsed flags, or DEBUG_DEFAULT_FLAGS if none specified
1274 : * @slabs: return start of list of slabs, or NULL when there's no list
1275 : * @init: assume this is initial parsing and not per-kmem-create parsing
1276 : *
1277 : * returns the start of next block if there's any, or NULL
1278 : */
1279 : static char *
1280 0 : parse_slub_debug_flags(char *str, slab_flags_t *flags, char **slabs, bool init)
1281 : {
1282 0 : bool higher_order_disable = false;
1283 :
1284 : /* Skip any completely empty blocks */
1285 0 : while (*str && *str == ';')
1286 0 : str++;
1287 :
1288 0 : if (*str == ',') {
1289 : /*
1290 : * No options but restriction on slabs. This means full
1291 : * debugging for slabs matching a pattern.
1292 : */
1293 0 : *flags = DEBUG_DEFAULT_FLAGS;
1294 0 : goto check_slabs;
1295 : }
1296 0 : *flags = 0;
1297 :
1298 : /* Determine which debug features should be switched on */
1299 0 : for (; *str && *str != ',' && *str != ';'; str++) {
1300 0 : switch (tolower(*str)) {
1301 0 : case '-':
1302 0 : *flags = 0;
1303 0 : break;
1304 0 : case 'f':
1305 0 : *flags |= SLAB_CONSISTENCY_CHECKS;
1306 0 : break;
1307 0 : case 'z':
1308 0 : *flags |= SLAB_RED_ZONE;
1309 0 : break;
1310 0 : case 'p':
1311 0 : *flags |= SLAB_POISON;
1312 0 : break;
1313 0 : case 'u':
1314 0 : *flags |= SLAB_STORE_USER;
1315 0 : break;
1316 0 : case 't':
1317 0 : *flags |= SLAB_TRACE;
1318 0 : break;
1319 0 : case 'a':
1320 0 : *flags |= SLAB_FAILSLAB;
1321 0 : break;
1322 : case 'o':
1323 : /*
1324 : * Avoid enabling debugging on caches if its minimum
1325 : * order would increase as a result.
1326 : */
1327 : higher_order_disable = true;
1328 : break;
1329 0 : default:
1330 0 : if (init)
1331 0 : pr_err("slub_debug option '%c' unknown. skipped\n", *str);
1332 : }
1333 : }
1334 0 : check_slabs:
1335 0 : if (*str == ',')
1336 0 : *slabs = ++str;
1337 : else
1338 0 : *slabs = NULL;
1339 :
1340 : /* Skip over the slab list */
1341 0 : while (*str && *str != ';')
1342 0 : str++;
1343 :
1344 : /* Skip any completely empty blocks */
1345 0 : while (*str && *str == ';')
1346 0 : str++;
1347 :
1348 0 : if (init && higher_order_disable)
1349 0 : disable_higher_order_debug = 1;
1350 :
1351 0 : if (*str)
1352 : return str;
1353 : else
1354 0 : return NULL;
1355 : }
1356 :
1357 0 : static int __init setup_slub_debug(char *str)
1358 : {
1359 0 : slab_flags_t flags;
1360 0 : char *saved_str;
1361 0 : char *slab_list;
1362 0 : bool global_slub_debug_changed = false;
1363 0 : bool slab_list_specified = false;
1364 :
1365 0 : slub_debug = DEBUG_DEFAULT_FLAGS;
1366 0 : if (*str++ != '=' || !*str)
1367 : /*
1368 : * No options specified. Switch on full debugging.
1369 : */
1370 0 : goto out;
1371 :
1372 0 : saved_str = str;
1373 0 : while (str) {
1374 0 : str = parse_slub_debug_flags(str, &flags, &slab_list, true);
1375 :
1376 0 : if (!slab_list) {
1377 0 : slub_debug = flags;
1378 0 : global_slub_debug_changed = true;
1379 : } else {
1380 : slab_list_specified = true;
1381 : }
1382 : }
1383 :
1384 : /*
1385 : * For backwards compatibility, a single list of flags with list of
1386 : * slabs means debugging is only enabled for those slabs, so the global
1387 : * slub_debug should be 0. We can extended that to multiple lists as
1388 : * long as there is no option specifying flags without a slab list.
1389 : */
1390 0 : if (slab_list_specified) {
1391 0 : if (!global_slub_debug_changed)
1392 0 : slub_debug = 0;
1393 0 : slub_debug_string = saved_str;
1394 : }
1395 0 : out:
1396 0 : if (slub_debug != 0 || slub_debug_string)
1397 0 : static_branch_enable(&slub_debug_enabled);
1398 0 : if ((static_branch_unlikely(&init_on_alloc) ||
1399 0 : static_branch_unlikely(&init_on_free)) &&
1400 0 : (slub_debug & SLAB_POISON))
1401 0 : pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n");
1402 0 : return 1;
1403 : }
1404 :
1405 : __setup("slub_debug", setup_slub_debug);
1406 :
1407 : /*
1408 : * kmem_cache_flags - apply debugging options to the cache
1409 : * @object_size: the size of an object without meta data
1410 : * @flags: flags to set
1411 : * @name: name of the cache
1412 : *
1413 : * Debug option(s) are applied to @flags. In addition to the debug
1414 : * option(s), if a slab name (or multiple) is specified i.e.
1415 : * slub_debug=<Debug-Options>,<slab name1>,<slab name2> ...
1416 : * then only the select slabs will receive the debug option(s).
1417 : */
1418 145 : slab_flags_t kmem_cache_flags(unsigned int object_size,
1419 : slab_flags_t flags, const char *name)
1420 : {
1421 145 : char *iter;
1422 145 : size_t len;
1423 145 : char *next_block;
1424 145 : slab_flags_t block_flags;
1425 145 : slab_flags_t slub_debug_local = slub_debug;
1426 :
1427 : /*
1428 : * If the slab cache is for debugging (e.g. kmemleak) then
1429 : * don't store user (stack trace) information by default,
1430 : * but let the user enable it via the command line below.
1431 : */
1432 145 : if (flags & SLAB_NOLEAKTRACE)
1433 3 : slub_debug_local &= ~SLAB_STORE_USER;
1434 :
1435 145 : len = strlen(name);
1436 145 : next_block = slub_debug_string;
1437 : /* Go through all blocks of debug options, see if any matches our slab's name */
1438 145 : while (next_block) {
1439 0 : next_block = parse_slub_debug_flags(next_block, &block_flags, &iter, false);
1440 0 : if (!iter)
1441 0 : continue;
1442 : /* Found a block that has a slab list, search it */
1443 0 : while (*iter) {
1444 0 : char *end, *glob;
1445 0 : size_t cmplen;
1446 :
1447 0 : end = strchrnul(iter, ',');
1448 0 : if (next_block && next_block < end)
1449 0 : end = next_block - 1;
1450 :
1451 0 : glob = strnchr(iter, end - iter, '*');
1452 0 : if (glob)
1453 0 : cmplen = glob - iter;
1454 : else
1455 0 : cmplen = max_t(size_t, len, (end - iter));
1456 :
1457 0 : if (!strncmp(name, iter, cmplen)) {
1458 0 : flags |= block_flags;
1459 0 : return flags;
1460 : }
1461 :
1462 0 : if (!*end || *end == ';')
1463 : break;
1464 0 : iter = end + 1;
1465 : }
1466 : }
1467 :
1468 145 : return flags | slub_debug_local;
1469 : }
1470 : #else /* !CONFIG_SLUB_DEBUG */
1471 : static inline void setup_object_debug(struct kmem_cache *s,
1472 : struct page *page, void *object) {}
1473 : static inline
1474 : void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {}
1475 :
1476 : static inline int alloc_debug_processing(struct kmem_cache *s,
1477 : struct page *page, void *object, unsigned long addr) { return 0; }
1478 :
1479 : static inline int free_debug_processing(
1480 : struct kmem_cache *s, struct page *page,
1481 : void *head, void *tail, int bulk_cnt,
1482 : unsigned long addr) { return 0; }
1483 :
1484 : static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1485 : { return 1; }
1486 : static inline int check_object(struct kmem_cache *s, struct page *page,
1487 : void *object, u8 val) { return 1; }
1488 : static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1489 : struct page *page) {}
1490 : static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
1491 : struct page *page) {}
1492 : slab_flags_t kmem_cache_flags(unsigned int object_size,
1493 : slab_flags_t flags, const char *name)
1494 : {
1495 : return flags;
1496 : }
1497 : #define slub_debug 0
1498 :
1499 : #define disable_higher_order_debug 0
1500 :
1501 : static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1502 : { return 0; }
1503 : static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1504 : { return 0; }
1505 : static inline void inc_slabs_node(struct kmem_cache *s, int node,
1506 : int objects) {}
1507 : static inline void dec_slabs_node(struct kmem_cache *s, int node,
1508 : int objects) {}
1509 :
1510 : static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
1511 : void **freelist, void *nextfree)
1512 : {
1513 : return false;
1514 : }
1515 : #endif /* CONFIG_SLUB_DEBUG */
1516 :
1517 : /*
1518 : * Hooks for other subsystems that check memory allocations. In a typical
1519 : * production configuration these hooks all should produce no code at all.
1520 : */
1521 2 : static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
1522 : {
1523 2 : ptr = kasan_kmalloc_large(ptr, size, flags);
1524 : /* As ptr might get tagged, call kmemleak hook after KASAN. */
1525 2 : kmemleak_alloc(ptr, size, 1, flags);
1526 2 : return ptr;
1527 : }
1528 :
1529 16 : static __always_inline void kfree_hook(void *x)
1530 : {
1531 16 : kmemleak_free(x);
1532 16 : kasan_kfree_large(x);
1533 : }
1534 :
1535 1303396 : static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x)
1536 : {
1537 2606584 : kmemleak_free_recursive(x, s->flags);
1538 :
1539 : /*
1540 : * Trouble is that we may no longer disable interrupts in the fast path
1541 : * So in order to make the debug calls that expect irqs to be
1542 : * disabled we need to disable interrupts temporarily.
1543 : */
1544 : #ifdef CONFIG_LOCKDEP
1545 : {
1546 1303188 : unsigned long flags;
1547 :
1548 2606917 : local_irq_save(flags);
1549 1303177 : debug_check_no_locks_freed(x, s->object_size);
1550 1303639 : local_irq_restore(flags);
1551 : }
1552 : #endif
1553 1304353 : if (!(s->flags & SLAB_DEBUG_OBJECTS))
1554 1255083 : debug_check_no_obj_freed(x, s->object_size);
1555 :
1556 : /* Use KCSAN to help debug racy use-after-free. */
1557 1304510 : if (!(s->flags & SLAB_TYPESAFE_BY_RCU))
1558 : __kcsan_check_access(x, s->object_size,
1559 : KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
1560 :
1561 : /* KASAN might put x into memory quarantine, delaying its reuse */
1562 1304510 : return kasan_slab_free(s, x);
1563 : }
1564 :
1565 1303450 : static inline bool slab_free_freelist_hook(struct kmem_cache *s,
1566 : void **head, void **tail)
1567 : {
1568 :
1569 1303450 : void *object;
1570 1303450 : void *next = *head;
1571 1303450 : void *old_tail = *tail ? *tail : *head;
1572 1303450 : int rsize;
1573 :
1574 1303450 : if (is_kfence_address(next)) {
1575 : slab_free_hook(s, next);
1576 : return true;
1577 : }
1578 :
1579 : /* Head and tail of the reconstructed freelist */
1580 1303450 : *head = NULL;
1581 1303450 : *tail = NULL;
1582 :
1583 1303396 : do {
1584 1303396 : object = next;
1585 1303396 : next = get_freepointer(s, object);
1586 :
1587 1303396 : if (slab_want_init_on_free(s)) {
1588 : /*
1589 : * Clear the object and the metadata, but don't touch
1590 : * the redzone.
1591 : */
1592 0 : memset(kasan_reset_tag(object), 0, s->object_size);
1593 0 : rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad
1594 0 : : 0;
1595 0 : memset((char *)kasan_reset_tag(object) + s->inuse, 0,
1596 : s->size - s->inuse - rsize);
1597 :
1598 : }
1599 : /* If object's reuse doesn't have to be delayed */
1600 2606596 : if (!slab_free_hook(s, object)) {
1601 : /* Move object to the new freelist */
1602 42091 : set_freepointer(s, object, *head);
1603 42091 : *head = object;
1604 42091 : if (!*tail)
1605 42091 : *tail = object;
1606 : }
1607 1303200 : } while (object != old_tail);
1608 :
1609 1303254 : if (*head == *tail)
1610 1303690 : *tail = NULL;
1611 :
1612 1303254 : return *head != NULL;
1613 : }
1614 :
1615 487513 : static void *setup_object(struct kmem_cache *s, struct page *page,
1616 : void *object)
1617 : {
1618 487513 : setup_object_debug(s, page, object);
1619 487520 : object = kasan_init_slab_obj(s, object);
1620 487525 : if (unlikely(s->ctor)) {
1621 20623 : kasan_unpoison_object_data(s, object);
1622 20623 : s->ctor(object);
1623 20622 : kasan_poison_object_data(s, object);
1624 : }
1625 487525 : return object;
1626 : }
1627 :
1628 : /*
1629 : * Slab allocation and freeing
1630 : */
1631 28142 : static inline struct page *alloc_slab_page(struct kmem_cache *s,
1632 : gfp_t flags, int node, struct kmem_cache_order_objects oo)
1633 : {
1634 28142 : struct page *page;
1635 28142 : unsigned int order = oo_order(oo);
1636 :
1637 28142 : if (node == NUMA_NO_NODE)
1638 28106 : page = alloc_pages(flags, order);
1639 : else
1640 36 : page = __alloc_pages_node(node, flags, order);
1641 :
1642 28145 : return page;
1643 : }
1644 :
1645 : #ifdef CONFIG_SLAB_FREELIST_RANDOM
1646 : /* Pre-initialize the random sequence cache */
1647 : static int init_cache_random_seq(struct kmem_cache *s)
1648 : {
1649 : unsigned int count = oo_objects(s->oo);
1650 : int err;
1651 :
1652 : /* Bailout if already initialised */
1653 : if (s->random_seq)
1654 : return 0;
1655 :
1656 : err = cache_random_seq_create(s, count, GFP_KERNEL);
1657 : if (err) {
1658 : pr_err("SLUB: Unable to initialize free list for %s\n",
1659 : s->name);
1660 : return err;
1661 : }
1662 :
1663 : /* Transform to an offset on the set of pages */
1664 : if (s->random_seq) {
1665 : unsigned int i;
1666 :
1667 : for (i = 0; i < count; i++)
1668 : s->random_seq[i] *= s->size;
1669 : }
1670 : return 0;
1671 : }
1672 :
1673 : /* Initialize each random sequence freelist per cache */
1674 : static void __init init_freelist_randomization(void)
1675 : {
1676 : struct kmem_cache *s;
1677 :
1678 : mutex_lock(&slab_mutex);
1679 :
1680 : list_for_each_entry(s, &slab_caches, list)
1681 : init_cache_random_seq(s);
1682 :
1683 : mutex_unlock(&slab_mutex);
1684 : }
1685 :
1686 : /* Get the next entry on the pre-computed freelist randomized */
1687 : static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
1688 : unsigned long *pos, void *start,
1689 : unsigned long page_limit,
1690 : unsigned long freelist_count)
1691 : {
1692 : unsigned int idx;
1693 :
1694 : /*
1695 : * If the target page allocation failed, the number of objects on the
1696 : * page might be smaller than the usual size defined by the cache.
1697 : */
1698 : do {
1699 : idx = s->random_seq[*pos];
1700 : *pos += 1;
1701 : if (*pos >= freelist_count)
1702 : *pos = 0;
1703 : } while (unlikely(idx >= page_limit));
1704 :
1705 : return (char *)start + idx;
1706 : }
1707 :
1708 : /* Shuffle the single linked freelist based on a random pre-computed sequence */
1709 : static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1710 : {
1711 : void *start;
1712 : void *cur;
1713 : void *next;
1714 : unsigned long idx, pos, page_limit, freelist_count;
1715 :
1716 : if (page->objects < 2 || !s->random_seq)
1717 : return false;
1718 :
1719 : freelist_count = oo_objects(s->oo);
1720 : pos = get_random_int() % freelist_count;
1721 :
1722 : page_limit = page->objects * s->size;
1723 : start = fixup_red_left(s, page_address(page));
1724 :
1725 : /* First entry is used as the base of the freelist */
1726 : cur = next_freelist_entry(s, page, &pos, start, page_limit,
1727 : freelist_count);
1728 : cur = setup_object(s, page, cur);
1729 : page->freelist = cur;
1730 :
1731 : for (idx = 1; idx < page->objects; idx++) {
1732 : next = next_freelist_entry(s, page, &pos, start, page_limit,
1733 : freelist_count);
1734 : next = setup_object(s, page, next);
1735 : set_freepointer(s, cur, next);
1736 : cur = next;
1737 : }
1738 : set_freepointer(s, cur, NULL);
1739 :
1740 : return true;
1741 : }
1742 : #else
1743 : static inline int init_cache_random_seq(struct kmem_cache *s)
1744 : {
1745 : return 0;
1746 : }
1747 1 : static inline void init_freelist_randomization(void) { }
1748 28144 : static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1749 : {
1750 28144 : return false;
1751 : }
1752 : #endif /* CONFIG_SLAB_FREELIST_RANDOM */
1753 :
1754 28138 : static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1755 : {
1756 28138 : struct page *page;
1757 28138 : struct kmem_cache_order_objects oo = s->oo;
1758 28138 : gfp_t alloc_gfp;
1759 28138 : void *start, *p, *next;
1760 28138 : int idx;
1761 28138 : bool shuffle;
1762 :
1763 28138 : flags &= gfp_allowed_mask;
1764 :
1765 28138 : if (gfpflags_allow_blocking(flags))
1766 25755 : local_irq_enable();
1767 :
1768 28141 : flags |= s->allocflags;
1769 :
1770 : /*
1771 : * Let the initial higher-order allocation fail under memory pressure
1772 : * so we fall-back to the minimum order allocation.
1773 : */
1774 28141 : alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1775 28141 : if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
1776 20248 : alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
1777 :
1778 28141 : page = alloc_slab_page(s, alloc_gfp, node, oo);
1779 28144 : if (unlikely(!page)) {
1780 0 : oo = s->min;
1781 0 : alloc_gfp = flags;
1782 : /*
1783 : * Allocation may have failed due to fragmentation.
1784 : * Try a lower order alloc if possible
1785 : */
1786 0 : page = alloc_slab_page(s, alloc_gfp, node, oo);
1787 0 : if (unlikely(!page))
1788 0 : goto out;
1789 28144 : stat(s, ORDER_FALLBACK);
1790 : }
1791 :
1792 28144 : page->objects = oo_objects(oo);
1793 :
1794 28144 : account_slab_page(page, oo_order(oo), s, flags);
1795 :
1796 28144 : page->slab_cache = s;
1797 28144 : __SetPageSlab(page);
1798 28144 : if (page_is_pfmemalloc(page))
1799 0 : SetPageSlabPfmemalloc(page);
1800 :
1801 28144 : kasan_poison_slab(page);
1802 :
1803 28144 : start = page_address(page);
1804 :
1805 28144 : setup_page_debug(s, page, start);
1806 :
1807 28144 : shuffle = shuffle_freelist(s, page);
1808 :
1809 28144 : if (!shuffle) {
1810 28144 : start = fixup_red_left(s, start);
1811 28144 : start = setup_object(s, page, start);
1812 28144 : page->freelist = start;
1813 487511 : for (idx = 0, p = start; idx < page->objects - 1; idx++) {
1814 459367 : next = p + s->size;
1815 459367 : next = setup_object(s, page, next);
1816 459367 : set_freepointer(s, p, next);
1817 459367 : p = next;
1818 : }
1819 28144 : set_freepointer(s, p, NULL);
1820 : }
1821 :
1822 28144 : page->inuse = page->objects;
1823 28144 : page->frozen = 1;
1824 :
1825 28144 : out:
1826 28144 : if (gfpflags_allow_blocking(flags))
1827 25761 : local_irq_disable();
1828 28144 : if (!page)
1829 : return NULL;
1830 :
1831 28144 : inc_slabs_node(s, page_to_nid(page), page->objects);
1832 :
1833 28144 : return page;
1834 : }
1835 :
1836 28138 : static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1837 : {
1838 28138 : if (unlikely(flags & GFP_SLAB_BUG_MASK))
1839 0 : flags = kmalloc_fix_flags(flags);
1840 :
1841 28138 : return allocate_slab(s,
1842 : flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1843 : }
1844 :
1845 17351 : static void __free_slab(struct kmem_cache *s, struct page *page)
1846 : {
1847 17351 : int order = compound_order(page);
1848 17351 : int pages = 1 << order;
1849 :
1850 17351 : if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
1851 0 : void *p;
1852 :
1853 0 : slab_pad_check(s, page);
1854 0 : for_each_object(p, s, page_address(page),
1855 : page->objects)
1856 0 : check_object(s, page, p, SLUB_RED_INACTIVE);
1857 : }
1858 :
1859 17351 : __ClearPageSlabPfmemalloc(page);
1860 17351 : __ClearPageSlab(page);
1861 : /* In union with page->mapping where page allocator expects NULL */
1862 17351 : page->slab_cache = NULL;
1863 17351 : if (current->reclaim_state)
1864 0 : current->reclaim_state->reclaimed_slab += pages;
1865 17351 : unaccount_slab_page(page, order, s);
1866 17351 : __free_pages(page, order);
1867 17351 : }
1868 :
1869 221 : static void rcu_free_slab(struct rcu_head *h)
1870 : {
1871 221 : struct page *page = container_of(h, struct page, rcu_head);
1872 :
1873 221 : __free_slab(page->slab_cache, page);
1874 221 : }
1875 :
1876 17351 : static void free_slab(struct kmem_cache *s, struct page *page)
1877 : {
1878 17351 : if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
1879 221 : call_rcu(&page->rcu_head, rcu_free_slab);
1880 : } else
1881 17130 : __free_slab(s, page);
1882 17351 : }
1883 :
1884 17351 : static void discard_slab(struct kmem_cache *s, struct page *page)
1885 : {
1886 17351 : dec_slabs_node(s, page_to_nid(page), page->objects);
1887 17351 : free_slab(s, page);
1888 17351 : }
1889 :
1890 : /*
1891 : * Management of partially allocated slabs.
1892 : */
1893 : static inline void
1894 213438 : __add_partial(struct kmem_cache_node *n, struct page *page, int tail)
1895 : {
1896 213438 : n->nr_partial++;
1897 213438 : if (tail == DEACTIVATE_TO_TAIL)
1898 213435 : list_add_tail(&page->slab_list, &n->partial);
1899 : else
1900 2 : list_add(&page->slab_list, &n->partial);
1901 : }
1902 :
1903 213440 : static inline void add_partial(struct kmem_cache_node *n,
1904 : struct page *page, int tail)
1905 : {
1906 426877 : lockdep_assert_held(&n->list_lock);
1907 213437 : __add_partial(n, page, tail);
1908 213437 : }
1909 :
1910 212753 : static inline void remove_partial(struct kmem_cache_node *n,
1911 : struct page *page)
1912 : {
1913 425507 : lockdep_assert_held(&n->list_lock);
1914 212753 : list_del(&page->slab_list);
1915 212753 : n->nr_partial--;
1916 212753 : }
1917 :
1918 : /*
1919 : * Remove slab from the partial list, freeze it and
1920 : * return the pointer to the freelist.
1921 : *
1922 : * Returns a list of objects or NULL if it fails.
1923 : */
1924 195401 : static inline void *acquire_slab(struct kmem_cache *s,
1925 : struct kmem_cache_node *n, struct page *page,
1926 : int mode, int *objects)
1927 : {
1928 195401 : void *freelist;
1929 195401 : unsigned long counters;
1930 195401 : struct page new;
1931 :
1932 390799 : lockdep_assert_held(&n->list_lock);
1933 :
1934 : /*
1935 : * Zap the freelist and set the frozen bit.
1936 : * The old freelist is the list of objects for the
1937 : * per cpu allocation list.
1938 : */
1939 195399 : freelist = page->freelist;
1940 195399 : counters = page->counters;
1941 195399 : new.counters = counters;
1942 195399 : *objects = new.objects - new.inuse;
1943 195399 : if (mode) {
1944 195399 : new.inuse = page->objects;
1945 195399 : new.freelist = NULL;
1946 : } else {
1947 : new.freelist = freelist;
1948 : }
1949 :
1950 195399 : VM_BUG_ON(new.frozen);
1951 195399 : new.frozen = 1;
1952 :
1953 195399 : if (!__cmpxchg_double_slab(s, page,
1954 : freelist, counters,
1955 : new.freelist, new.counters,
1956 : "acquire_slab"))
1957 : return NULL;
1958 :
1959 195403 : remove_partial(n, page);
1960 195403 : WARN_ON(!freelist);
1961 : return freelist;
1962 : }
1963 :
1964 : static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1965 : static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
1966 :
1967 : /*
1968 : * Try to allocate a partial slab from a specific node.
1969 : */
1970 223498 : static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1971 : struct kmem_cache_cpu *c, gfp_t flags)
1972 : {
1973 223498 : struct page *page, *page2;
1974 223498 : void *object = NULL;
1975 223498 : unsigned int available = 0;
1976 223498 : int objects;
1977 :
1978 : /*
1979 : * Racy check. If we mistakenly see no partial slabs then we
1980 : * just allocate an empty slab. If we mistakenly try to get a
1981 : * partial slab and there is none available then get_partial()
1982 : * will return NULL.
1983 : */
1984 223498 : if (!n || !n->nr_partial)
1985 : return NULL;
1986 :
1987 195363 : spin_lock(&n->list_lock);
1988 195404 : list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
1989 195401 : void *t;
1990 :
1991 195401 : if (!pfmemalloc_match(page, flags))
1992 0 : continue;
1993 :
1994 195400 : t = acquire_slab(s, n, page, object == NULL, &objects);
1995 195400 : if (!t)
1996 : break;
1997 :
1998 195400 : available += objects;
1999 195400 : if (!object) {
2000 195400 : c->page = page;
2001 195400 : stat(s, ALLOC_FROM_PARTIAL);
2002 195400 : object = t;
2003 : } else {
2004 195400 : put_cpu_partial(s, page, 0);
2005 195400 : stat(s, CPU_PARTIAL_NODE);
2006 : }
2007 195400 : if (!kmem_cache_has_cpu_partial(s)
2008 : || available > slub_cpu_partial(s) / 2)
2009 : break;
2010 :
2011 : }
2012 195403 : spin_unlock(&n->list_lock);
2013 195403 : return object;
2014 : }
2015 :
2016 : /*
2017 : * Get a page from somewhere. Search in increasing NUMA distances.
2018 : */
2019 28102 : static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
2020 : struct kmem_cache_cpu *c)
2021 : {
2022 : #ifdef CONFIG_NUMA
2023 28102 : struct zonelist *zonelist;
2024 28102 : struct zoneref *z;
2025 28102 : struct zone *zone;
2026 28102 : enum zone_type highest_zoneidx = gfp_zone(flags);
2027 28102 : void *object;
2028 28102 : unsigned int cpuset_mems_cookie;
2029 :
2030 : /*
2031 : * The defrag ratio allows a configuration of the tradeoffs between
2032 : * inter node defragmentation and node local allocations. A lower
2033 : * defrag_ratio increases the tendency to do local allocations
2034 : * instead of attempting to obtain partial slabs from other nodes.
2035 : *
2036 : * If the defrag_ratio is set to 0 then kmalloc() always
2037 : * returns node local objects. If the ratio is higher then kmalloc()
2038 : * may return off node objects because partial slabs are obtained
2039 : * from other nodes and filled up.
2040 : *
2041 : * If /sys/kernel/slab/xx/remote_node_defrag_ratio is set to 100
2042 : * (which makes defrag_ratio = 1000) then every (well almost)
2043 : * allocation will first attempt to defrag slab caches on other nodes.
2044 : * This means scanning over all nodes to look for partial slabs which
2045 : * may be expensive if we do it every time we are trying to find a slab
2046 : * with available objects.
2047 : */
2048 28102 : if (!s->remote_node_defrag_ratio ||
2049 28104 : get_cycles() % 1024 > s->remote_node_defrag_ratio)
2050 : return NULL;
2051 :
2052 27511 : do {
2053 27511 : cpuset_mems_cookie = read_mems_allowed_begin();
2054 27511 : zonelist = node_zonelist(mempolicy_slab_node(), flags);
2055 55018 : for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
2056 27509 : struct kmem_cache_node *n;
2057 :
2058 27509 : n = get_node(s, zone_to_nid(zone));
2059 :
2060 27509 : if (n && cpuset_zone_allowed(zone, flags) &&
2061 27509 : n->nr_partial > s->min_partial) {
2062 0 : object = get_partial_node(s, n, c, flags);
2063 0 : if (object) {
2064 : /*
2065 : * Don't check read_mems_allowed_retry()
2066 : * here - if mems_allowed was updated in
2067 : * parallel, that was a harmless race
2068 : * between allocation and the cpuset
2069 : * update
2070 : */
2071 0 : return object;
2072 : }
2073 : }
2074 : }
2075 28102 : } while (read_mems_allowed_retry(cpuset_mems_cookie));
2076 : #endif /* CONFIG_NUMA */
2077 : return NULL;
2078 : }
2079 :
2080 : /*
2081 : * Get a partial page, lock it and return it.
2082 : */
2083 223536 : static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
2084 : struct kmem_cache_cpu *c)
2085 : {
2086 223536 : void *object;
2087 223536 : int searchnode = node;
2088 :
2089 223536 : if (node == NUMA_NO_NODE)
2090 223501 : searchnode = numa_mem_id();
2091 :
2092 223536 : object = get_partial_node(s, get_node(s, searchnode), c, flags);
2093 223536 : if (object || node != NUMA_NO_NODE)
2094 : return object;
2095 :
2096 28102 : return get_any_partial(s, flags, c);
2097 : }
2098 :
2099 : #ifdef CONFIG_PREEMPTION
2100 : /*
2101 : * Calculate the next globally unique transaction for disambiguation
2102 : * during cmpxchg. The transactions start with the cpu number and are then
2103 : * incremented by CONFIG_NR_CPUS.
2104 : */
2105 : #define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
2106 : #else
2107 : /*
2108 : * No preemption supported therefore also no need to check for
2109 : * different cpus.
2110 : */
2111 : #define TID_STEP 1
2112 : #endif
2113 :
2114 1488749 : static inline unsigned long next_tid(unsigned long tid)
2115 : {
2116 1488749 : return tid + TID_STEP;
2117 : }
2118 :
2119 : #ifdef SLUB_DEBUG_CMPXCHG
2120 : static inline unsigned int tid_to_cpu(unsigned long tid)
2121 : {
2122 : return tid % TID_STEP;
2123 : }
2124 :
2125 : static inline unsigned long tid_to_event(unsigned long tid)
2126 : {
2127 : return tid / TID_STEP;
2128 : }
2129 : #endif
2130 :
2131 580 : static inline unsigned int init_tid(int cpu)
2132 : {
2133 580 : return cpu;
2134 : }
2135 :
2136 : static inline void note_cmpxchg_failure(const char *n,
2137 : const struct kmem_cache *s, unsigned long tid)
2138 : {
2139 : #ifdef SLUB_DEBUG_CMPXCHG
2140 : unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
2141 :
2142 : pr_info("%s %s: cmpxchg redo ", n, s->name);
2143 :
2144 : #ifdef CONFIG_PREEMPTION
2145 : if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
2146 : pr_warn("due to cpu change %d -> %d\n",
2147 : tid_to_cpu(tid), tid_to_cpu(actual_tid));
2148 : else
2149 : #endif
2150 : if (tid_to_event(tid) != tid_to_event(actual_tid))
2151 : pr_warn("due to cpu running other code. Event %ld->%ld\n",
2152 : tid_to_event(tid), tid_to_event(actual_tid));
2153 : else
2154 : pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
2155 : actual_tid, tid, next_tid(tid));
2156 : #endif
2157 : stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
2158 : }
2159 :
2160 145 : static void init_kmem_cache_cpus(struct kmem_cache *s)
2161 : {
2162 145 : int cpu;
2163 :
2164 725 : for_each_possible_cpu(cpu)
2165 580 : per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
2166 145 : }
2167 :
2168 : /*
2169 : * Remove the cpu slab
2170 : */
2171 2 : static void deactivate_slab(struct kmem_cache *s, struct page *page,
2172 : void *freelist, struct kmem_cache_cpu *c)
2173 : {
2174 2 : enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
2175 2 : struct kmem_cache_node *n = get_node(s, page_to_nid(page));
2176 2 : int lock = 0, free_delta = 0;
2177 2 : enum slab_modes l = M_NONE, m = M_NONE;
2178 2 : void *nextfree, *freelist_iter, *freelist_tail;
2179 2 : int tail = DEACTIVATE_TO_HEAD;
2180 2 : struct page new;
2181 2 : struct page old;
2182 :
2183 2 : if (page->freelist) {
2184 0 : stat(s, DEACTIVATE_REMOTE_FREES);
2185 0 : tail = DEACTIVATE_TO_TAIL;
2186 : }
2187 :
2188 : /*
2189 : * Stage one: Count the objects on cpu's freelist as free_delta and
2190 : * remember the last object in freelist_tail for later splicing.
2191 : */
2192 2 : freelist_tail = NULL;
2193 2 : freelist_iter = freelist;
2194 45 : while (freelist_iter) {
2195 43 : nextfree = get_freepointer(s, freelist_iter);
2196 :
2197 : /*
2198 : * If 'nextfree' is invalid, it is possible that the object at
2199 : * 'freelist_iter' is already corrupted. So isolate all objects
2200 : * starting at 'freelist_iter' by skipping them.
2201 : */
2202 43 : if (freelist_corrupted(s, page, &freelist_iter, nextfree))
2203 : break;
2204 :
2205 43 : freelist_tail = freelist_iter;
2206 43 : free_delta++;
2207 :
2208 43 : freelist_iter = nextfree;
2209 : }
2210 :
2211 : /*
2212 : * Stage two: Unfreeze the page while splicing the per-cpu
2213 : * freelist to the head of page's freelist.
2214 : *
2215 : * Ensure that the page is unfrozen while the list presence
2216 : * reflects the actual number of objects during unfreeze.
2217 : *
2218 : * We setup the list membership and then perform a cmpxchg
2219 : * with the count. If there is a mismatch then the page
2220 : * is not unfrozen but the page is on the wrong list.
2221 : *
2222 : * Then we restart the process which may have to remove
2223 : * the page from the list that we just put it on again
2224 : * because the number of objects in the slab may have
2225 : * changed.
2226 : */
2227 2 : redo:
2228 :
2229 2 : old.freelist = READ_ONCE(page->freelist);
2230 2 : old.counters = READ_ONCE(page->counters);
2231 2 : VM_BUG_ON(!old.frozen);
2232 :
2233 : /* Determine target state of the slab */
2234 2 : new.counters = old.counters;
2235 2 : if (freelist_tail) {
2236 2 : new.inuse -= free_delta;
2237 2 : set_freepointer(s, freelist_tail, old.freelist);
2238 2 : new.freelist = freelist;
2239 : } else
2240 : new.freelist = old.freelist;
2241 :
2242 2 : new.frozen = 0;
2243 :
2244 2 : if (!new.inuse && n->nr_partial >= s->min_partial)
2245 : m = M_FREE;
2246 2 : else if (new.freelist) {
2247 2 : m = M_PARTIAL;
2248 2 : if (!lock) {
2249 2 : lock = 1;
2250 : /*
2251 : * Taking the spinlock removes the possibility
2252 : * that acquire_slab() will see a slab page that
2253 : * is frozen
2254 : */
2255 2 : spin_lock(&n->list_lock);
2256 : }
2257 : } else {
2258 0 : m = M_FULL;
2259 0 : if (kmem_cache_debug_flags(s, SLAB_STORE_USER) && !lock) {
2260 0 : lock = 1;
2261 : /*
2262 : * This also ensures that the scanning of full
2263 : * slabs from diagnostic functions will not see
2264 : * any frozen slabs.
2265 : */
2266 0 : spin_lock(&n->list_lock);
2267 : }
2268 : }
2269 :
2270 2 : if (l != m) {
2271 2 : if (l == M_PARTIAL)
2272 0 : remove_partial(n, page);
2273 2 : else if (l == M_FULL)
2274 0 : remove_full(s, n, page);
2275 :
2276 2 : if (m == M_PARTIAL)
2277 2 : add_partial(n, page, tail);
2278 0 : else if (m == M_FULL)
2279 0 : add_full(s, n, page);
2280 : }
2281 :
2282 2 : l = m;
2283 2 : if (!__cmpxchg_double_slab(s, page,
2284 : old.freelist, old.counters,
2285 : new.freelist, new.counters,
2286 : "unfreezing slab"))
2287 0 : goto redo;
2288 :
2289 2 : if (lock)
2290 2 : spin_unlock(&n->list_lock);
2291 :
2292 2 : if (m == M_PARTIAL)
2293 2 : stat(s, tail);
2294 0 : else if (m == M_FULL)
2295 2 : stat(s, DEACTIVATE_FULL);
2296 0 : else if (m == M_FREE) {
2297 0 : stat(s, DEACTIVATE_EMPTY);
2298 0 : discard_slab(s, page);
2299 0 : stat(s, FREE_SLAB);
2300 : }
2301 :
2302 2 : c->page = NULL;
2303 2 : c->freelist = NULL;
2304 2 : }
2305 :
2306 : /*
2307 : * Unfreeze all the cpu partial slabs.
2308 : *
2309 : * This function must be called with interrupts disabled
2310 : * for the cpu using c (or some other guarantee must be there
2311 : * to guarantee no concurrent accesses).
2312 : */
2313 2 : static void unfreeze_partials(struct kmem_cache *s,
2314 : struct kmem_cache_cpu *c)
2315 : {
2316 : #ifdef CONFIG_SLUB_CPU_PARTIAL
2317 : struct kmem_cache_node *n = NULL, *n2 = NULL;
2318 : struct page *page, *discard_page = NULL;
2319 :
2320 : while ((page = slub_percpu_partial(c))) {
2321 : struct page new;
2322 : struct page old;
2323 :
2324 : slub_set_percpu_partial(c, page);
2325 :
2326 : n2 = get_node(s, page_to_nid(page));
2327 : if (n != n2) {
2328 : if (n)
2329 : spin_unlock(&n->list_lock);
2330 :
2331 : n = n2;
2332 : spin_lock(&n->list_lock);
2333 : }
2334 :
2335 : do {
2336 :
2337 : old.freelist = page->freelist;
2338 : old.counters = page->counters;
2339 : VM_BUG_ON(!old.frozen);
2340 :
2341 : new.counters = old.counters;
2342 : new.freelist = old.freelist;
2343 :
2344 : new.frozen = 0;
2345 :
2346 : } while (!__cmpxchg_double_slab(s, page,
2347 : old.freelist, old.counters,
2348 : new.freelist, new.counters,
2349 : "unfreezing slab"));
2350 :
2351 : if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
2352 : page->next = discard_page;
2353 : discard_page = page;
2354 : } else {
2355 : add_partial(n, page, DEACTIVATE_TO_TAIL);
2356 : stat(s, FREE_ADD_PARTIAL);
2357 : }
2358 : }
2359 :
2360 : if (n)
2361 : spin_unlock(&n->list_lock);
2362 :
2363 : while (discard_page) {
2364 : page = discard_page;
2365 : discard_page = discard_page->next;
2366 :
2367 : stat(s, DEACTIVATE_EMPTY);
2368 : discard_slab(s, page);
2369 : stat(s, FREE_SLAB);
2370 : }
2371 : #endif /* CONFIG_SLUB_CPU_PARTIAL */
2372 2 : }
2373 :
2374 : /*
2375 : * Put a page that was just frozen (in __slab_free|get_partial_node) into a
2376 : * partial page slot if available.
2377 : *
2378 : * If we did not find a slot then simply move all the partials to the
2379 : * per node partial list.
2380 : */
2381 : static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2382 : {
2383 : #ifdef CONFIG_SLUB_CPU_PARTIAL
2384 : struct page *oldpage;
2385 : int pages;
2386 : int pobjects;
2387 :
2388 : preempt_disable();
2389 : do {
2390 : pages = 0;
2391 : pobjects = 0;
2392 : oldpage = this_cpu_read(s->cpu_slab->partial);
2393 :
2394 : if (oldpage) {
2395 : pobjects = oldpage->pobjects;
2396 : pages = oldpage->pages;
2397 : if (drain && pobjects > slub_cpu_partial(s)) {
2398 : unsigned long flags;
2399 : /*
2400 : * partial array is full. Move the existing
2401 : * set to the per node partial list.
2402 : */
2403 : local_irq_save(flags);
2404 : unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2405 : local_irq_restore(flags);
2406 : oldpage = NULL;
2407 : pobjects = 0;
2408 : pages = 0;
2409 : stat(s, CPU_PARTIAL_DRAIN);
2410 : }
2411 : }
2412 :
2413 : pages++;
2414 : pobjects += page->objects - page->inuse;
2415 :
2416 : page->pages = pages;
2417 : page->pobjects = pobjects;
2418 : page->next = oldpage;
2419 :
2420 : } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
2421 : != oldpage);
2422 : if (unlikely(!slub_cpu_partial(s))) {
2423 : unsigned long flags;
2424 :
2425 : local_irq_save(flags);
2426 : unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2427 : local_irq_restore(flags);
2428 : }
2429 : preempt_enable();
2430 : #endif /* CONFIG_SLUB_CPU_PARTIAL */
2431 : }
2432 :
2433 2 : static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2434 : {
2435 2 : stat(s, CPUSLAB_FLUSH);
2436 2 : deactivate_slab(s, c->page, c->freelist, c);
2437 :
2438 2 : c->tid = next_tid(c->tid);
2439 2 : }
2440 :
2441 : /*
2442 : * Flush cpu slab.
2443 : *
2444 : * Called from IPI handler with interrupts disabled.
2445 : */
2446 2 : static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2447 : {
2448 2 : struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2449 :
2450 2 : if (c->page)
2451 2 : flush_slab(s, c);
2452 :
2453 2 : unfreeze_partials(s, c);
2454 2 : }
2455 :
2456 0 : static void flush_cpu_slab(void *d)
2457 : {
2458 0 : struct kmem_cache *s = d;
2459 :
2460 0 : __flush_cpu_slab(s, smp_processor_id());
2461 0 : }
2462 :
2463 0 : static bool has_cpu_slab(int cpu, void *info)
2464 : {
2465 0 : struct kmem_cache *s = info;
2466 0 : struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2467 :
2468 0 : return c->page || slub_percpu_partial(c);
2469 : }
2470 :
2471 0 : static void flush_all(struct kmem_cache *s)
2472 : {
2473 0 : on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
2474 : }
2475 :
2476 : /*
2477 : * Use the cpu notifier to insure that the cpu slabs are flushed when
2478 : * necessary.
2479 : */
2480 0 : static int slub_cpu_dead(unsigned int cpu)
2481 : {
2482 0 : struct kmem_cache *s;
2483 0 : unsigned long flags;
2484 :
2485 0 : mutex_lock(&slab_mutex);
2486 0 : list_for_each_entry(s, &slab_caches, list) {
2487 0 : local_irq_save(flags);
2488 0 : __flush_cpu_slab(s, cpu);
2489 0 : local_irq_restore(flags);
2490 : }
2491 0 : mutex_unlock(&slab_mutex);
2492 0 : return 0;
2493 : }
2494 :
2495 : /*
2496 : * Check if the objects in a per cpu structure fit numa
2497 : * locality expectations.
2498 : */
2499 1470510 : static inline int node_match(struct page *page, int node)
2500 : {
2501 : #ifdef CONFIG_NUMA
2502 44468 : if (node != NUMA_NO_NODE && page_to_nid(page) != node)
2503 0 : return 0;
2504 : #endif
2505 : return 1;
2506 : }
2507 :
2508 : #ifdef CONFIG_SLUB_DEBUG
2509 0 : static int count_free(struct page *page)
2510 : {
2511 0 : return page->objects - page->inuse;
2512 : }
2513 :
2514 0 : static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2515 : {
2516 0 : return atomic_long_read(&n->total_objects);
2517 : }
2518 : #endif /* CONFIG_SLUB_DEBUG */
2519 :
2520 : #if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
2521 0 : static unsigned long count_partial(struct kmem_cache_node *n,
2522 : int (*get_count)(struct page *))
2523 : {
2524 0 : unsigned long flags;
2525 0 : unsigned long x = 0;
2526 0 : struct page *page;
2527 :
2528 0 : spin_lock_irqsave(&n->list_lock, flags);
2529 0 : list_for_each_entry(page, &n->partial, slab_list)
2530 0 : x += get_count(page);
2531 0 : spin_unlock_irqrestore(&n->list_lock, flags);
2532 0 : return x;
2533 : }
2534 : #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
2535 :
2536 : static noinline void
2537 0 : slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2538 : {
2539 : #ifdef CONFIG_SLUB_DEBUG
2540 0 : static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
2541 : DEFAULT_RATELIMIT_BURST);
2542 0 : int node;
2543 0 : struct kmem_cache_node *n;
2544 :
2545 0 : if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
2546 0 : return;
2547 :
2548 0 : pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
2549 : nid, gfpflags, &gfpflags);
2550 0 : pr_warn(" cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
2551 : s->name, s->object_size, s->size, oo_order(s->oo),
2552 : oo_order(s->min));
2553 :
2554 0 : if (oo_order(s->min) > get_order(s->object_size))
2555 0 : pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
2556 : s->name);
2557 :
2558 0 : for_each_kmem_cache_node(s, node, n) {
2559 0 : unsigned long nr_slabs;
2560 0 : unsigned long nr_objs;
2561 0 : unsigned long nr_free;
2562 :
2563 0 : nr_free = count_partial(n, count_free);
2564 0 : nr_slabs = node_nr_slabs(n);
2565 0 : nr_objs = node_nr_objs(n);
2566 :
2567 0 : pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
2568 : node, nr_slabs, nr_objs, nr_free);
2569 : }
2570 : #endif
2571 : }
2572 :
2573 223532 : static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2574 : int node, struct kmem_cache_cpu **pc)
2575 : {
2576 223532 : void *freelist;
2577 223532 : struct kmem_cache_cpu *c = *pc;
2578 223532 : struct page *page;
2579 :
2580 447064 : WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
2581 :
2582 223532 : freelist = get_partial(s, flags, node, c);
2583 :
2584 223530 : if (freelist)
2585 : return freelist;
2586 :
2587 28133 : page = new_slab(s, flags, node);
2588 28135 : if (page) {
2589 28135 : c = raw_cpu_ptr(s->cpu_slab);
2590 28135 : if (c->page)
2591 0 : flush_slab(s, c);
2592 :
2593 : /*
2594 : * No other reference to the page yet so we can
2595 : * muck around with it freely without cmpxchg
2596 : */
2597 28135 : freelist = page->freelist;
2598 28135 : page->freelist = NULL;
2599 :
2600 28135 : stat(s, ALLOC_SLAB);
2601 28135 : c->page = page;
2602 28135 : *pc = c;
2603 : }
2604 :
2605 : return freelist;
2606 : }
2607 :
2608 646319 : static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2609 : {
2610 646319 : if (unlikely(PageSlabPfmemalloc(page)))
2611 0 : return gfp_pfmemalloc_allowed(gfpflags);
2612 :
2613 : return true;
2614 : }
2615 :
2616 : /*
2617 : * Check the page->freelist of a page and either transfer the freelist to the
2618 : * per cpu freelist or deactivate the page.
2619 : *
2620 : * The page is still frozen if the return value is not NULL.
2621 : *
2622 : * If this function returns NULL then the page has been unfrozen.
2623 : *
2624 : * This function must be called with interrupt disabled.
2625 : */
2626 227561 : static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2627 : {
2628 227564 : struct page new;
2629 227564 : unsigned long counters;
2630 227564 : void *freelist;
2631 :
2632 227564 : do {
2633 227564 : freelist = page->freelist;
2634 227564 : counters = page->counters;
2635 :
2636 227564 : new.counters = counters;
2637 227564 : VM_BUG_ON(!new.frozen);
2638 :
2639 227564 : new.inuse = page->objects;
2640 227564 : new.frozen = freelist != NULL;
2641 :
2642 227564 : } while (!__cmpxchg_double_slab(s, page,
2643 : freelist, counters,
2644 : NULL, new.counters,
2645 227604 : "get_freelist"));
2646 :
2647 227601 : return freelist;
2648 : }
2649 :
2650 : /*
2651 : * Slow path. The lockless freelist is empty or we need to perform
2652 : * debugging duties.
2653 : *
2654 : * Processing is still very fast if new objects have been freed to the
2655 : * regular freelist. In that case we simply take over the regular freelist
2656 : * as the lockless freelist and zap the regular freelist.
2657 : *
2658 : * If that is not working then we fall back to the partial lists. We take the
2659 : * first element of the freelist as the object to allocate now and move the
2660 : * rest of the freelist to the lockless freelist.
2661 : *
2662 : * And if we were unable to get a new slab from the partial slab lists then
2663 : * we need to allocate a new slab. This is the slowest path since it involves
2664 : * a call to the page allocator and the setup of a new slab.
2665 : *
2666 : * Version of __slab_alloc to use when we know that interrupts are
2667 : * already disabled (which is the case for bulk allocation).
2668 : */
2669 227919 : static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2670 : unsigned long addr, struct kmem_cache_cpu *c)
2671 : {
2672 227919 : void *freelist;
2673 227919 : struct page *page;
2674 :
2675 227919 : stat(s, ALLOC_SLOWPATH);
2676 :
2677 227919 : page = c->page;
2678 227919 : if (!page) {
2679 : /*
2680 : * if the node is not online or has no normal memory, just
2681 : * ignore the node constraint
2682 : */
2683 378 : if (unlikely(node != NUMA_NO_NODE &&
2684 : !node_isset(node, slab_nodes)))
2685 0 : node = NUMA_NO_NODE;
2686 378 : goto new_slab;
2687 : }
2688 227541 : redo:
2689 :
2690 455082 : if (unlikely(!node_match(page, node))) {
2691 : /*
2692 : * same as above but node_match() being false already
2693 : * implies node != NUMA_NO_NODE
2694 : */
2695 0 : if (!node_isset(node, slab_nodes)) {
2696 0 : node = NUMA_NO_NODE;
2697 0 : goto redo;
2698 : } else {
2699 0 : stat(s, ALLOC_NODE_MISMATCH);
2700 0 : deactivate_slab(s, page, c->freelist, c);
2701 0 : goto new_slab;
2702 : }
2703 : }
2704 :
2705 : /*
2706 : * By rights, we should be searching for a slab page that was
2707 : * PFMEMALLOC but right now, we are losing the pfmemalloc
2708 : * information when the page leaves the per-cpu allocator
2709 : */
2710 227541 : if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2711 0 : deactivate_slab(s, page, c->freelist, c);
2712 0 : goto new_slab;
2713 : }
2714 :
2715 : /* must check again c->freelist in case of cpu migration or IRQ */
2716 227558 : freelist = c->freelist;
2717 227558 : if (freelist)
2718 0 : goto load_freelist;
2719 :
2720 227558 : freelist = get_freelist(s, page);
2721 :
2722 227594 : if (!freelist) {
2723 223152 : c->page = NULL;
2724 223152 : stat(s, DEACTIVATE_BYPASS);
2725 223152 : goto new_slab;
2726 : }
2727 :
2728 227970 : stat(s, ALLOC_REFILL);
2729 :
2730 227970 : load_freelist:
2731 : /*
2732 : * freelist is pointing to the list of objects to be used.
2733 : * page is pointing to the page from which the objects are obtained.
2734 : * That page must be frozen for per cpu allocations to work.
2735 : */
2736 227970 : VM_BUG_ON(!c->page->frozen);
2737 227970 : c->freelist = get_freepointer(s, freelist);
2738 227970 : c->tid = next_tid(c->tid);
2739 227970 : return freelist;
2740 :
2741 223530 : new_slab:
2742 :
2743 223530 : if (slub_percpu_partial(c)) {
2744 : page = c->page = slub_percpu_partial(c);
2745 : slub_set_percpu_partial(c, page);
2746 : stat(s, CPU_PARTIAL_ALLOC);
2747 : goto redo;
2748 : }
2749 :
2750 223530 : freelist = new_slab_objects(s, gfpflags, node, &c);
2751 :
2752 223526 : if (unlikely(!freelist)) {
2753 0 : slab_out_of_memory(s, gfpflags, node);
2754 0 : return NULL;
2755 : }
2756 :
2757 223526 : page = c->page;
2758 223526 : if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2759 223528 : goto load_freelist;
2760 :
2761 : /* Only entered in the debug case */
2762 0 : if (kmem_cache_debug(s) &&
2763 0 : !alloc_debug_processing(s, page, freelist, addr))
2764 0 : goto new_slab; /* Slab failed checks. Next slab needed */
2765 :
2766 0 : deactivate_slab(s, page, get_freepointer(s, freelist), c);
2767 0 : return freelist;
2768 : }
2769 :
2770 : /*
2771 : * Another one that disabled interrupt and compensates for possible
2772 : * cpu changes by refetching the per cpu area pointer.
2773 : */
2774 227894 : static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2775 : unsigned long addr, struct kmem_cache_cpu *c)
2776 : {
2777 227894 : void *p;
2778 227894 : unsigned long flags;
2779 :
2780 455804 : local_irq_save(flags);
2781 : #ifdef CONFIG_PREEMPTION
2782 : /*
2783 : * We may have been preempted and rescheduled on a different
2784 : * cpu before disabling interrupts. Need to reload cpu area
2785 : * pointer.
2786 : */
2787 : c = this_cpu_ptr(s->cpu_slab);
2788 : #endif
2789 :
2790 227879 : p = ___slab_alloc(s, gfpflags, node, addr, c);
2791 227927 : local_irq_restore(flags);
2792 227915 : return p;
2793 : }
2794 :
2795 : /*
2796 : * If the object has been wiped upon free, make sure it's fully initialized by
2797 : * zeroing out freelist pointer.
2798 : */
2799 1471570 : static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
2800 : void *obj)
2801 : {
2802 1471570 : if (unlikely(slab_want_init_on_free(s)) && obj)
2803 0 : memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
2804 : 0, sizeof(void *));
2805 : }
2806 :
2807 : /*
2808 : * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
2809 : * have the fastpath folded into their functions. So no function call
2810 : * overhead for requests that can be satisfied on the fastpath.
2811 : *
2812 : * The fastpath works by first checking if the lockless freelist can be used.
2813 : * If not then __slab_alloc is called for slow processing.
2814 : *
2815 : * Otherwise we can simply pick the next object from the lockless free list.
2816 : */
2817 1470321 : static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2818 : gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
2819 : {
2820 1470321 : void *object;
2821 1470321 : struct kmem_cache_cpu *c;
2822 1470321 : struct page *page;
2823 1470321 : unsigned long tid;
2824 1470321 : struct obj_cgroup *objcg = NULL;
2825 :
2826 2940757 : s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
2827 1470436 : if (!s)
2828 : return NULL;
2829 :
2830 1470436 : object = kfence_alloc(s, orig_size, gfpflags);
2831 1470436 : if (unlikely(object))
2832 : goto out;
2833 :
2834 1470436 : redo:
2835 : /*
2836 : * Must read kmem_cache cpu data via this cpu ptr. Preemption is
2837 : * enabled. We may switch back and forth between cpus while
2838 : * reading from one cpu area. That does not matter as long
2839 : * as we end up on the original cpu again when doing the cmpxchg.
2840 : *
2841 : * We should guarantee that tid and kmem_cache are retrieved on
2842 : * the same cpu. It could be different if CONFIG_PREEMPTION so we need
2843 : * to check if it is matched or not.
2844 : */
2845 1470436 : do {
2846 1470436 : tid = this_cpu_read(s->cpu_slab->tid);
2847 1470624 : c = raw_cpu_ptr(s->cpu_slab);
2848 1470663 : } while (IS_ENABLED(CONFIG_PREEMPTION) &&
2849 : unlikely(tid != READ_ONCE(c->tid)));
2850 :
2851 : /*
2852 : * Irqless object alloc/free algorithm used here depends on sequence
2853 : * of fetching cpu_slab's data. tid should be fetched before anything
2854 : * on c to guarantee that object and page associated with previous tid
2855 : * won't be used with current tid. If we fetch tid first, object and
2856 : * page could be one associated with next tid and our alloc/free
2857 : * request will be failed. In this case, we will retry. So, no problem.
2858 : */
2859 1470663 : barrier();
2860 :
2861 : /*
2862 : * The transaction ids are globally unique per cpu and per operation on
2863 : * a per cpu queue. Thus they can be guarantee that the cmpxchg_double
2864 : * occurs on the right processor and that there was no operation on the
2865 : * linked list in between.
2866 : */
2867 :
2868 1470879 : object = c->freelist;
2869 1470879 : page = c->page;
2870 1515318 : if (unlikely(!object || !page || !node_match(page, node))) {
2871 227910 : object = __slab_alloc(s, gfpflags, node, addr, c);
2872 : } else {
2873 1242969 : void *next_object = get_freepointer_safe(s, object);
2874 :
2875 : /*
2876 : * The cmpxchg will only match if there was no additional
2877 : * operation and if we are on the right processor.
2878 : *
2879 : * The cmpxchg does the following atomically (without lock
2880 : * semantics!)
2881 : * 1. Relocate first pointer to the current per cpu area.
2882 : * 2. Verify that tid and freelist have not been changed
2883 : * 3. If they were not changed replace tid and freelist
2884 : *
2885 : * Since this is without lock semantics the protection is only
2886 : * against code executing on this cpu *not* from access by
2887 : * other cpus.
2888 : */
2889 1242969 : if (unlikely(!this_cpu_cmpxchg_double(
2890 : s->cpu_slab->freelist, s->cpu_slab->tid,
2891 : object, tid,
2892 : next_object, next_tid(tid)))) {
2893 :
2894 0 : note_cmpxchg_failure("slab_alloc", s, tid);
2895 0 : goto redo;
2896 : }
2897 1242978 : prefetch_freepointer(s, next_object);
2898 : stat(s, ALLOC_FASTPATH);
2899 : }
2900 :
2901 1470914 : maybe_wipe_obj_freeptr(s, object);
2902 :
2903 1470914 : if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
2904 236954 : memset(kasan_reset_tag(object), 0, s->object_size);
2905 :
2906 1233882 : out:
2907 1470764 : slab_post_alloc_hook(s, objcg, gfpflags, 1, &object);
2908 :
2909 1471012 : return object;
2910 : }
2911 :
2912 1421994 : static __always_inline void *slab_alloc(struct kmem_cache *s,
2913 : gfp_t gfpflags, unsigned long addr, size_t orig_size)
2914 : {
2915 2844678 : return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr, orig_size);
2916 : }
2917 :
2918 1341836 : void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2919 : {
2920 1341836 : void *ret = slab_alloc(s, gfpflags, _RET_IP_, s->object_size);
2921 :
2922 1342544 : trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
2923 1342535 : s->size, gfpflags);
2924 :
2925 1342559 : return ret;
2926 : }
2927 : EXPORT_SYMBOL(kmem_cache_alloc);
2928 :
2929 : #ifdef CONFIG_TRACING
2930 29216 : void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2931 : {
2932 29216 : void *ret = slab_alloc(s, gfpflags, _RET_IP_, size);
2933 29220 : trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2934 29220 : ret = kasan_kmalloc(s, ret, size, gfpflags);
2935 29220 : return ret;
2936 : }
2937 : EXPORT_SYMBOL(kmem_cache_alloc_trace);
2938 : #endif
2939 :
2940 : #ifdef CONFIG_NUMA
2941 17268 : void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2942 : {
2943 17268 : void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, s->object_size);
2944 :
2945 17268 : trace_kmem_cache_alloc_node(_RET_IP_, ret,
2946 17268 : s->object_size, s->size, gfpflags, node);
2947 :
2948 17268 : return ret;
2949 : }
2950 : EXPORT_SYMBOL(kmem_cache_alloc_node);
2951 :
2952 : #ifdef CONFIG_TRACING
2953 11727 : void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2954 : gfp_t gfpflags,
2955 : int node, size_t size)
2956 : {
2957 11727 : void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, size);
2958 :
2959 11727 : trace_kmalloc_node(_RET_IP_, ret,
2960 11727 : size, s->size, gfpflags, node);
2961 :
2962 11727 : ret = kasan_kmalloc(s, ret, size, gfpflags);
2963 11727 : return ret;
2964 : }
2965 : EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2966 : #endif
2967 : #endif /* CONFIG_NUMA */
2968 :
2969 : /*
2970 : * Slow path handling. This may still be called frequently since objects
2971 : * have a longer lifetime than the cpu slabs in most processing loads.
2972 : *
2973 : * So we still attempt to reduce cache line usage. Just take the slab
2974 : * lock and free the item. If there is no additional partial page
2975 : * handling required then we can return immediately.
2976 : */
2977 1235789 : static void __slab_free(struct kmem_cache *s, struct page *page,
2978 : void *head, void *tail, int cnt,
2979 : unsigned long addr)
2980 :
2981 : {
2982 1235789 : void *prior;
2983 1235789 : int was_frozen;
2984 1235789 : struct page new;
2985 1235789 : unsigned long counters;
2986 1235789 : struct kmem_cache_node *n = NULL;
2987 1235789 : unsigned long flags;
2988 :
2989 1235789 : stat(s, FREE_SLOWPATH);
2990 :
2991 1235789 : if (kfence_free(head))
2992 1218478 : return;
2993 :
2994 1235789 : if (kmem_cache_debug(s) &&
2995 0 : !free_debug_processing(s, page, head, tail, cnt, addr))
2996 : return;
2997 :
2998 1235760 : do {
2999 1235760 : if (unlikely(n)) {
3000 0 : spin_unlock_irqrestore(&n->list_lock, flags);
3001 0 : n = NULL;
3002 : }
3003 1235760 : prior = page->freelist;
3004 1235760 : counters = page->counters;
3005 1235760 : set_freepointer(s, tail, prior);
3006 1235760 : new.counters = counters;
3007 1235760 : was_frozen = new.frozen;
3008 1235760 : new.inuse -= cnt;
3009 1235760 : if ((!new.inuse || !prior) && !was_frozen) {
3010 :
3011 232027 : if (kmem_cache_has_cpu_partial(s) && !prior) {
3012 :
3013 : /*
3014 : * Slab was on no list before and will be
3015 : * partially empty
3016 : * We can defer the list move and instead
3017 : * freeze it.
3018 : */
3019 : new.frozen = 1;
3020 :
3021 : } else { /* Needs to be taken off a list */
3022 :
3023 232027 : n = get_node(s, page_to_nid(page));
3024 : /*
3025 : * Speculatively acquire the list_lock.
3026 : * If the cmpxchg does not succeed then we may
3027 : * drop the list_lock without any processing.
3028 : *
3029 : * Otherwise the list_lock will synchronize with
3030 : * other processors updating the list of slabs.
3031 : */
3032 232027 : spin_lock_irqsave(&n->list_lock, flags);
3033 :
3034 : }
3035 : }
3036 :
3037 1235758 : } while (!cmpxchg_double_slab(s, page,
3038 : prior, counters,
3039 : head, new.counters,
3040 1235808 : "__slab_free"));
3041 :
3042 1235838 : if (likely(!n)) {
3043 :
3044 : if (likely(was_frozen)) {
3045 : /*
3046 : * The list lock was not taken therefore no list
3047 : * activity can be necessary.
3048 : */
3049 1218478 : stat(s, FREE_FROZEN);
3050 : } else if (new.frozen) {
3051 : /*
3052 : * If we just froze the page then put it onto the
3053 : * per cpu partial list.
3054 : */
3055 1218478 : put_cpu_partial(s, page, 1);
3056 1218478 : stat(s, CPU_PARTIAL_FREE);
3057 : }
3058 :
3059 : return;
3060 : }
3061 :
3062 232030 : if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
3063 17351 : goto slab_empty;
3064 :
3065 : /*
3066 : * Objects left in the slab. If it was not on the partial list before
3067 : * then add it.
3068 : */
3069 214679 : if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
3070 213438 : remove_full(s, n, page);
3071 213438 : add_partial(n, page, DEACTIVATE_TO_TAIL);
3072 213438 : stat(s, FREE_ADD_PARTIAL);
3073 : }
3074 1433154 : spin_unlock_irqrestore(&n->list_lock, flags);
3075 : return;
3076 :
3077 17351 : slab_empty:
3078 17351 : if (prior) {
3079 : /*
3080 : * Slab on the partial list.
3081 : */
3082 17351 : remove_partial(n, page);
3083 17351 : stat(s, FREE_REMOVE_PARTIAL);
3084 : } else {
3085 : /* Slab must be on the full list */
3086 0 : remove_full(s, n, page);
3087 : }
3088 :
3089 17351 : spin_unlock_irqrestore(&n->list_lock, flags);
3090 17351 : stat(s, FREE_SLAB);
3091 17351 : discard_slab(s, page);
3092 : }
3093 :
3094 : /*
3095 : * Fastpath with forced inlining to produce a kfree and kmem_cache_free that
3096 : * can perform fastpath freeing without additional function calls.
3097 : *
3098 : * The fastpath is only possible if we are freeing to the current cpu slab
3099 : * of this processor. This typically the case if we have just allocated
3100 : * the item before.
3101 : *
3102 : * If fastpath is not possible then fall back to __slab_free where we deal
3103 : * with all sorts of special processing.
3104 : *
3105 : * Bulk free of a freelist with several objects (all pointing to the
3106 : * same page) possible by specifying head and tail ptr, plus objects
3107 : * count (cnt). Bulk free indicated by tail pointer being set.
3108 : */
3109 1253478 : static __always_inline void do_slab_free(struct kmem_cache *s,
3110 : struct page *page, void *head, void *tail,
3111 : int cnt, unsigned long addr)
3112 : {
3113 1295571 : void *tail_obj = tail ? : head;
3114 1253478 : struct kmem_cache_cpu *c;
3115 1253478 : unsigned long tid;
3116 :
3117 42093 : memcg_slab_free_hook(s, &head, 1);
3118 1253514 : redo:
3119 : /*
3120 : * Determine the currently cpus per cpu slab.
3121 : * The cpu may change afterward. However that does not matter since
3122 : * data is retrieved via this pointer. If we are on the same cpu
3123 : * during the cmpxchg then the free will succeed.
3124 : */
3125 1253514 : do {
3126 1253514 : tid = this_cpu_read(s->cpu_slab->tid);
3127 1253516 : c = raw_cpu_ptr(s->cpu_slab);
3128 1253518 : } while (IS_ENABLED(CONFIG_PREEMPTION) &&
3129 : unlikely(tid != READ_ONCE(c->tid)));
3130 :
3131 : /* Same with comment on barrier() in slab_alloc_node() */
3132 1253518 : barrier();
3133 :
3134 1253530 : if (likely(page == c->page)) {
3135 17717 : void **freelist = READ_ONCE(c->freelist);
3136 :
3137 17717 : set_freepointer(s, tail_obj, freelist);
3138 :
3139 17717 : if (unlikely(!this_cpu_cmpxchg_double(
3140 : s->cpu_slab->freelist, s->cpu_slab->tid,
3141 : freelist, tid,
3142 : head, next_tid(tid)))) {
3143 :
3144 0 : note_cmpxchg_failure("slab_free", s, tid);
3145 0 : goto redo;
3146 : }
3147 : stat(s, FREE_FASTPATH);
3148 : } else
3149 1235813 : __slab_free(s, page, head, tail_obj, cnt, addr);
3150 :
3151 : }
3152 :
3153 1304784 : static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
3154 : void *head, void *tail, int cnt,
3155 : unsigned long addr)
3156 : {
3157 : /*
3158 : * With KASAN enabled slab_free_freelist_hook modifies the freelist
3159 : * to remove objects, whose reuse must be delayed.
3160 : */
3161 1303535 : if (slab_free_freelist_hook(s, &head, &tail))
3162 42093 : do_slab_free(s, page, head, tail, cnt, addr);
3163 : }
3164 :
3165 : #ifdef CONFIG_KASAN_GENERIC
3166 1211385 : void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
3167 : {
3168 1211385 : do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
3169 1211419 : }
3170 : #endif
3171 :
3172 1202261 : void kmem_cache_free(struct kmem_cache *s, void *x)
3173 : {
3174 1202261 : s = cache_from_obj(s, x);
3175 1202215 : if (!s)
3176 : return;
3177 1202215 : slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
3178 1201505 : trace_kmem_cache_free(_RET_IP_, x, s->name);
3179 : }
3180 : EXPORT_SYMBOL(kmem_cache_free);
3181 :
3182 : struct detached_freelist {
3183 : struct page *page;
3184 : void *tail;
3185 : void *freelist;
3186 : int cnt;
3187 : struct kmem_cache *s;
3188 : };
3189 :
3190 : /*
3191 : * This function progressively scans the array with free objects (with
3192 : * a limited look ahead) and extract objects belonging to the same
3193 : * page. It builds a detached freelist directly within the given
3194 : * page/objects. This can happen without any need for
3195 : * synchronization, because the objects are owned by running process.
3196 : * The freelist is build up as a single linked list in the objects.
3197 : * The idea is, that this detached freelist can then be bulk
3198 : * transferred to the real freelist(s), but only requiring a single
3199 : * synchronization primitive. Look ahead in the array is limited due
3200 : * to performance reasons.
3201 : */
3202 : static inline
3203 188 : int build_detached_freelist(struct kmem_cache *s, size_t size,
3204 : void **p, struct detached_freelist *df)
3205 : {
3206 188 : size_t first_skipped_index = 0;
3207 188 : int lookahead = 3;
3208 188 : void *object;
3209 188 : struct page *page;
3210 :
3211 : /* Always re-init detached_freelist */
3212 188 : df->page = NULL;
3213 :
3214 188 : do {
3215 188 : object = p[--size];
3216 : /* Do we need !ZERO_OR_NULL_PTR(object) here? (for kfree) */
3217 188 : } while (!object && size);
3218 :
3219 188 : if (!object)
3220 : return 0;
3221 :
3222 188 : page = virt_to_head_page(object);
3223 188 : if (!s) {
3224 : /* Handle kalloc'ed objects */
3225 188 : if (unlikely(!PageSlab(page))) {
3226 0 : BUG_ON(!PageCompound(page));
3227 0 : kfree_hook(object);
3228 0 : __free_pages(page, compound_order(page));
3229 0 : p[size] = NULL; /* mark object processed */
3230 0 : return size;
3231 : }
3232 : /* Derive kmem_cache from object */
3233 188 : df->s = page->slab_cache;
3234 : } else {
3235 0 : df->s = cache_from_obj(s, object); /* Support for memcg */
3236 : }
3237 :
3238 188 : if (is_kfence_address(object)) {
3239 : slab_free_hook(df->s, object);
3240 : __kfence_free(object);
3241 : p[size] = NULL; /* mark object processed */
3242 : return size;
3243 : }
3244 :
3245 : /* Start new detached freelist */
3246 188 : df->page = page;
3247 188 : set_freepointer(df->s, object, NULL);
3248 188 : df->tail = object;
3249 188 : df->freelist = object;
3250 188 : p[size] = NULL; /* mark object processed */
3251 188 : df->cnt = 1;
3252 :
3253 316 : while (size) {
3254 134 : object = p[--size];
3255 134 : if (!object)
3256 11 : continue; /* Skip processed objects */
3257 :
3258 : /* df->page is always set at this point */
3259 123 : if (df->page == virt_to_head_page(object)) {
3260 : /* Opportunity build freelist */
3261 64 : set_freepointer(df->s, object, df->freelist);
3262 64 : df->freelist = object;
3263 64 : df->cnt++;
3264 64 : p[size] = NULL; /* mark object processed */
3265 :
3266 64 : continue;
3267 : }
3268 :
3269 : /* Limit look ahead search */
3270 59 : if (!--lookahead)
3271 : break;
3272 :
3273 53 : if (!first_skipped_index)
3274 43 : first_skipped_index = size + 1;
3275 : }
3276 :
3277 188 : return first_skipped_index;
3278 : }
3279 :
3280 : /* Note that interrupts must be enabled when calling this function. */
3281 145 : void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
3282 : {
3283 145 : if (WARN_ON(!size))
3284 : return;
3285 :
3286 188 : memcg_slab_free_hook(s, p, size);
3287 188 : do {
3288 188 : struct detached_freelist df;
3289 :
3290 188 : size = build_detached_freelist(s, size, p, &df);
3291 188 : if (!df.page)
3292 0 : continue;
3293 :
3294 376 : slab_free(df.s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
3295 188 : } while (likely(size));
3296 : }
3297 : EXPORT_SYMBOL(kmem_cache_free_bulk);
3298 :
3299 : /* Note that interrupts must be enabled when calling this function. */
3300 41 : int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
3301 : void **p)
3302 : {
3303 41 : struct kmem_cache_cpu *c;
3304 41 : int i;
3305 41 : struct obj_cgroup *objcg = NULL;
3306 :
3307 : /* memcg and kmem_cache debug support */
3308 41 : s = slab_pre_alloc_hook(s, &objcg, size, flags);
3309 41 : if (unlikely(!s))
3310 : return false;
3311 : /*
3312 : * Drain objects in the per cpu slab, while disabling local
3313 : * IRQs, which protects against PREEMPT and interrupts
3314 : * handlers invoking normal fastpath.
3315 : */
3316 41 : local_irq_disable();
3317 41 : c = this_cpu_ptr(s->cpu_slab);
3318 :
3319 697 : for (i = 0; i < size; i++) {
3320 656 : void *object = kfence_alloc(s, s->object_size, flags);
3321 :
3322 656 : if (unlikely(object)) {
3323 : p[i] = object;
3324 : continue;
3325 : }
3326 :
3327 656 : object = c->freelist;
3328 656 : if (unlikely(!object)) {
3329 : /*
3330 : * We may have removed an object from c->freelist using
3331 : * the fastpath in the previous iteration; in that case,
3332 : * c->tid has not been bumped yet.
3333 : * Since ___slab_alloc() may reenable interrupts while
3334 : * allocating memory, we should bump c->tid now.
3335 : */
3336 50 : c->tid = next_tid(c->tid);
3337 :
3338 : /*
3339 : * Invoking slow path likely have side-effect
3340 : * of re-populating per CPU c->freelist
3341 : */
3342 50 : p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
3343 50 : _RET_IP_, c);
3344 50 : if (unlikely(!p[i]))
3345 0 : goto error;
3346 :
3347 50 : c = this_cpu_ptr(s->cpu_slab);
3348 50 : maybe_wipe_obj_freeptr(s, p[i]);
3349 :
3350 50 : continue; /* goto for-loop */
3351 : }
3352 606 : c->freelist = get_freepointer(s, object);
3353 606 : p[i] = object;
3354 1262 : maybe_wipe_obj_freeptr(s, p[i]);
3355 : }
3356 41 : c->tid = next_tid(c->tid);
3357 41 : local_irq_enable();
3358 :
3359 : /* Clear memory outside IRQ disabled fastpath loop */
3360 41 : if (unlikely(slab_want_init_on_alloc(flags, s))) {
3361 : int j;
3362 :
3363 0 : for (j = 0; j < i; j++)
3364 0 : memset(kasan_reset_tag(p[j]), 0, s->object_size);
3365 : }
3366 :
3367 : /* memcg and kmem_cache debug support */
3368 41 : slab_post_alloc_hook(s, objcg, flags, size, p);
3369 41 : return i;
3370 0 : error:
3371 0 : local_irq_enable();
3372 0 : slab_post_alloc_hook(s, objcg, flags, i, p);
3373 0 : __kmem_cache_free_bulk(s, i, p);
3374 0 : return 0;
3375 : }
3376 : EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3377 :
3378 :
3379 : /*
3380 : * Object placement in a slab is made very easy because we always start at
3381 : * offset 0. If we tune the size of the object to the alignment then we can
3382 : * get the required alignment by putting one properly sized object after
3383 : * another.
3384 : *
3385 : * Notice that the allocation order determines the sizes of the per cpu
3386 : * caches. Each processor has always one slab available for allocations.
3387 : * Increasing the allocation order reduces the number of times that slabs
3388 : * must be moved on and off the partial lists and is therefore a factor in
3389 : * locking overhead.
3390 : */
3391 :
3392 : /*
3393 : * Mininum / Maximum order of slab pages. This influences locking overhead
3394 : * and slab fragmentation. A higher order reduces the number of partial slabs
3395 : * and increases the number of allocations possible without having to
3396 : * take the list_lock.
3397 : */
3398 : static unsigned int slub_min_order;
3399 : static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
3400 : static unsigned int slub_min_objects;
3401 :
3402 : /*
3403 : * Calculate the order of allocation given an slab object size.
3404 : *
3405 : * The order of allocation has significant impact on performance and other
3406 : * system components. Generally order 0 allocations should be preferred since
3407 : * order 0 does not cause fragmentation in the page allocator. Larger objects
3408 : * be problematic to put into order 0 slabs because there may be too much
3409 : * unused space left. We go to a higher order if more than 1/16th of the slab
3410 : * would be wasted.
3411 : *
3412 : * In order to reach satisfactory performance we must ensure that a minimum
3413 : * number of objects is in one slab. Otherwise we may generate too much
3414 : * activity on the partial lists which requires taking the list_lock. This is
3415 : * less a concern for large slabs though which are rarely used.
3416 : *
3417 : * slub_max_order specifies the order where we begin to stop considering the
3418 : * number of objects in a slab as critical. If we reach slub_max_order then
3419 : * we try to keep the page order as low as possible. So we accept more waste
3420 : * of space in favor of a small page order.
3421 : *
3422 : * Higher order allocations also allow the placement of more objects in a
3423 : * slab and thereby reduce object handling overhead. If the user has
3424 : * requested a higher mininum order then we start with that one instead of
3425 : * the smallest order which will fit the object.
3426 : */
3427 150 : static inline unsigned int slab_order(unsigned int size,
3428 : unsigned int min_objects, unsigned int max_order,
3429 : unsigned int fract_leftover)
3430 : {
3431 150 : unsigned int min_order = slub_min_order;
3432 150 : unsigned int order;
3433 :
3434 150 : if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
3435 0 : return get_order(size * MAX_OBJS_PER_PAGE) - 1;
3436 :
3437 300 : for (order = max(min_order, (unsigned int)get_order(min_objects * size));
3438 155 : order <= max_order; order++) {
3439 :
3440 150 : unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
3441 150 : unsigned int rem;
3442 :
3443 150 : rem = slab_size % size;
3444 :
3445 150 : if (rem <= slab_size / fract_leftover)
3446 : break;
3447 : }
3448 :
3449 : return order;
3450 : }
3451 :
3452 145 : static inline int calculate_order(unsigned int size)
3453 : {
3454 145 : unsigned int order;
3455 145 : unsigned int min_objects;
3456 145 : unsigned int max_objects;
3457 145 : unsigned int nr_cpus;
3458 :
3459 : /*
3460 : * Attempt to find best configuration for a slab. This
3461 : * works by first attempting to generate a layout with
3462 : * the best configuration and backing off gradually.
3463 : *
3464 : * First we increase the acceptable waste in a slab. Then
3465 : * we reduce the minimum objects required in a slab.
3466 : */
3467 145 : min_objects = slub_min_objects;
3468 145 : if (!min_objects) {
3469 : /*
3470 : * Some architectures will only update present cpus when
3471 : * onlining them, so don't trust the number if it's just 1. But
3472 : * we also don't want to use nr_cpu_ids always, as on some other
3473 : * architectures, there can be many possible cpus, but never
3474 : * onlined. Here we compromise between trying to avoid too high
3475 : * order on systems that appear larger than they are, and too
3476 : * low order on systems that appear smaller than they are.
3477 : */
3478 145 : nr_cpus = num_present_cpus();
3479 145 : if (nr_cpus <= 1)
3480 0 : nr_cpus = nr_cpu_ids;
3481 145 : min_objects = 4 * (fls(nr_cpus) + 1);
3482 : }
3483 145 : max_objects = order_objects(slub_max_order, size);
3484 145 : min_objects = min(min_objects, max_objects);
3485 :
3486 145 : while (min_objects > 1) {
3487 : unsigned int fraction;
3488 :
3489 : fraction = 16;
3490 150 : while (fraction >= 4) {
3491 150 : order = slab_order(size, min_objects,
3492 : slub_max_order, fraction);
3493 150 : if (order <= slub_max_order)
3494 145 : return order;
3495 5 : fraction /= 2;
3496 : }
3497 0 : min_objects--;
3498 : }
3499 :
3500 : /*
3501 : * We were unable to place multiple objects in a slab. Now
3502 : * lets see if we can place a single object there.
3503 : */
3504 0 : order = slab_order(size, 1, slub_max_order, 1);
3505 0 : if (order <= slub_max_order)
3506 0 : return order;
3507 :
3508 : /*
3509 : * Doh this slab cannot be placed using slub_max_order.
3510 : */
3511 0 : order = slab_order(size, 1, MAX_ORDER, 1);
3512 0 : if (order < MAX_ORDER)
3513 0 : return order;
3514 : return -ENOSYS;
3515 : }
3516 :
3517 : static void
3518 145 : init_kmem_cache_node(struct kmem_cache_node *n)
3519 : {
3520 145 : n->nr_partial = 0;
3521 145 : spin_lock_init(&n->list_lock);
3522 145 : INIT_LIST_HEAD(&n->partial);
3523 : #ifdef CONFIG_SLUB_DEBUG
3524 145 : atomic_long_set(&n->nr_slabs, 0);
3525 145 : atomic_long_set(&n->total_objects, 0);
3526 145 : INIT_LIST_HEAD(&n->full);
3527 : #endif
3528 145 : }
3529 :
3530 145 : static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
3531 : {
3532 145 : BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
3533 : KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
3534 :
3535 : /*
3536 : * Must align to double word boundary for the double cmpxchg
3537 : * instructions to work; see __pcpu_double_call_return_bool().
3538 : */
3539 145 : s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
3540 : 2 * sizeof(void *));
3541 :
3542 145 : if (!s->cpu_slab)
3543 : return 0;
3544 :
3545 145 : init_kmem_cache_cpus(s);
3546 :
3547 145 : return 1;
3548 : }
3549 :
3550 : static struct kmem_cache *kmem_cache_node;
3551 :
3552 : /*
3553 : * No kmalloc_node yet so do it by hand. We know that this is the first
3554 : * slab on the node for this slabcache. There are no concurrent accesses
3555 : * possible.
3556 : *
3557 : * Note that this function only works on the kmem_cache_node
3558 : * when allocating for the kmem_cache_node. This is used for bootstrapping
3559 : * memory on a fresh node that has no slab structures yet.
3560 : */
3561 1 : static void early_kmem_cache_node_alloc(int node)
3562 : {
3563 1 : struct page *page;
3564 1 : struct kmem_cache_node *n;
3565 :
3566 1 : BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
3567 :
3568 1 : page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
3569 :
3570 1 : BUG_ON(!page);
3571 1 : if (page_to_nid(page) != node) {
3572 0 : pr_err("SLUB: Unable to allocate memory from node %d\n", node);
3573 0 : pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
3574 : }
3575 :
3576 1 : n = page->freelist;
3577 1 : BUG_ON(!n);
3578 : #ifdef CONFIG_SLUB_DEBUG
3579 1 : init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
3580 1 : init_tracking(kmem_cache_node, n);
3581 : #endif
3582 1 : n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL);
3583 1 : page->freelist = get_freepointer(kmem_cache_node, n);
3584 1 : page->inuse = 1;
3585 1 : page->frozen = 0;
3586 1 : kmem_cache_node->node[node] = n;
3587 1 : init_kmem_cache_node(n);
3588 1 : inc_slabs_node(kmem_cache_node, node, page->objects);
3589 :
3590 : /*
3591 : * No locks need to be taken here as it has just been
3592 : * initialized and there is no concurrent access.
3593 : */
3594 1 : __add_partial(n, page, DEACTIVATE_TO_HEAD);
3595 1 : }
3596 :
3597 0 : static void free_kmem_cache_nodes(struct kmem_cache *s)
3598 : {
3599 0 : int node;
3600 0 : struct kmem_cache_node *n;
3601 :
3602 0 : for_each_kmem_cache_node(s, node, n) {
3603 0 : s->node[node] = NULL;
3604 0 : kmem_cache_free(kmem_cache_node, n);
3605 : }
3606 0 : }
3607 :
3608 0 : void __kmem_cache_release(struct kmem_cache *s)
3609 : {
3610 0 : cache_random_seq_destroy(s);
3611 0 : free_percpu(s->cpu_slab);
3612 0 : free_kmem_cache_nodes(s);
3613 0 : }
3614 :
3615 145 : static int init_kmem_cache_nodes(struct kmem_cache *s)
3616 : {
3617 145 : int node;
3618 :
3619 290 : for_each_node_mask(node, slab_nodes) {
3620 145 : struct kmem_cache_node *n;
3621 :
3622 145 : if (slab_state == DOWN) {
3623 1 : early_kmem_cache_node_alloc(node);
3624 1 : continue;
3625 : }
3626 144 : n = kmem_cache_alloc_node(kmem_cache_node,
3627 : GFP_KERNEL, node);
3628 :
3629 144 : if (!n) {
3630 0 : free_kmem_cache_nodes(s);
3631 0 : return 0;
3632 : }
3633 :
3634 144 : init_kmem_cache_node(n);
3635 144 : s->node[node] = n;
3636 : }
3637 : return 1;
3638 : }
3639 :
3640 145 : static void set_min_partial(struct kmem_cache *s, unsigned long min)
3641 : {
3642 145 : if (min < MIN_PARTIAL)
3643 : min = MIN_PARTIAL;
3644 : else if (min > MAX_PARTIAL)
3645 : min = MAX_PARTIAL;
3646 145 : s->min_partial = min;
3647 : }
3648 :
3649 145 : static void set_cpu_partial(struct kmem_cache *s)
3650 : {
3651 : #ifdef CONFIG_SLUB_CPU_PARTIAL
3652 : /*
3653 : * cpu_partial determined the maximum number of objects kept in the
3654 : * per cpu partial lists of a processor.
3655 : *
3656 : * Per cpu partial lists mainly contain slabs that just have one
3657 : * object freed. If they are used for allocation then they can be
3658 : * filled up again with minimal effort. The slab will never hit the
3659 : * per node partial lists and therefore no locking will be required.
3660 : *
3661 : * This setting also determines
3662 : *
3663 : * A) The number of objects from per cpu partial slabs dumped to the
3664 : * per node list when we reach the limit.
3665 : * B) The number of objects in cpu partial slabs to extract from the
3666 : * per node list when we run out of per cpu objects. We only fetch
3667 : * 50% to keep some capacity around for frees.
3668 : */
3669 : if (!kmem_cache_has_cpu_partial(s))
3670 : slub_set_cpu_partial(s, 0);
3671 : else if (s->size >= PAGE_SIZE)
3672 : slub_set_cpu_partial(s, 2);
3673 : else if (s->size >= 1024)
3674 : slub_set_cpu_partial(s, 6);
3675 : else if (s->size >= 256)
3676 : slub_set_cpu_partial(s, 13);
3677 : else
3678 : slub_set_cpu_partial(s, 30);
3679 : #endif
3680 145 : }
3681 :
3682 : /*
3683 : * calculate_sizes() determines the order and the distribution of data within
3684 : * a slab object.
3685 : */
3686 145 : static int calculate_sizes(struct kmem_cache *s, int forced_order)
3687 : {
3688 145 : slab_flags_t flags = s->flags;
3689 145 : unsigned int size = s->object_size;
3690 145 : unsigned int freepointer_area;
3691 145 : unsigned int order;
3692 :
3693 : /*
3694 : * Round up object size to the next word boundary. We can only
3695 : * place the free pointer at word boundaries and this determines
3696 : * the possible location of the free pointer.
3697 : */
3698 145 : size = ALIGN(size, sizeof(void *));
3699 : /*
3700 : * This is the area of the object where a freepointer can be
3701 : * safely written. If redzoning adds more to the inuse size, we
3702 : * can't use that portion for writing the freepointer, so
3703 : * s->offset must be limited within this for the general case.
3704 : */
3705 145 : freepointer_area = size;
3706 :
3707 : #ifdef CONFIG_SLUB_DEBUG
3708 : /*
3709 : * Determine if we can poison the object itself. If the user of
3710 : * the slab may touch the object after free or before allocation
3711 : * then we should never poison the object itself.
3712 : */
3713 145 : if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
3714 0 : !s->ctor)
3715 0 : s->flags |= __OBJECT_POISON;
3716 : else
3717 145 : s->flags &= ~__OBJECT_POISON;
3718 :
3719 :
3720 : /*
3721 : * If we are Redzoning then check if there is some space between the
3722 : * end of the object and the free pointer. If not then add an
3723 : * additional word to have some bytes to store Redzone information.
3724 : */
3725 145 : if ((flags & SLAB_RED_ZONE) && size == s->object_size)
3726 0 : size += sizeof(void *);
3727 : #endif
3728 :
3729 : /*
3730 : * With that we have determined the number of bytes in actual use
3731 : * by the object. This is the potential offset to the free pointer.
3732 : */
3733 145 : s->inuse = size;
3734 :
3735 145 : if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
3736 138 : s->ctor)) {
3737 : /*
3738 : * Relocate free pointer after the object if it is not
3739 : * permitted to overwrite the first word of the object on
3740 : * kmem_cache_free.
3741 : *
3742 : * This is the case if we do RCU, have a constructor or
3743 : * destructor or are poisoning the objects.
3744 : *
3745 : * The assumption that s->offset >= s->inuse means free
3746 : * pointer is outside of the object is used in the
3747 : * freeptr_outside_object() function. If that is no
3748 : * longer true, the function needs to be modified.
3749 : */
3750 17 : s->offset = size;
3751 17 : size += sizeof(void *);
3752 128 : } else if (freepointer_area > sizeof(void *)) {
3753 : /*
3754 : * Store freelist pointer near middle of object to keep
3755 : * it away from the edges of the object to avoid small
3756 : * sized over/underflows from neighboring allocations.
3757 : */
3758 126 : s->offset = ALIGN(freepointer_area / 2, sizeof(void *));
3759 : }
3760 :
3761 : #ifdef CONFIG_SLUB_DEBUG
3762 145 : if (flags & SLAB_STORE_USER)
3763 : /*
3764 : * Need to store information about allocs and frees after
3765 : * the object.
3766 : */
3767 0 : size += 2 * sizeof(struct track);
3768 : #endif
3769 :
3770 145 : kasan_cache_create(s, &size, &s->flags);
3771 : #ifdef CONFIG_SLUB_DEBUG
3772 145 : if (flags & SLAB_RED_ZONE) {
3773 : /*
3774 : * Add some empty padding so that we can catch
3775 : * overwrites from earlier objects rather than let
3776 : * tracking information or the free pointer be
3777 : * corrupted if a user writes before the start
3778 : * of the object.
3779 : */
3780 0 : size += sizeof(void *);
3781 :
3782 0 : s->red_left_pad = sizeof(void *);
3783 0 : s->red_left_pad = ALIGN(s->red_left_pad, s->align);
3784 0 : size += s->red_left_pad;
3785 : }
3786 : #endif
3787 :
3788 : /*
3789 : * SLUB stores one object immediately after another beginning from
3790 : * offset 0. In order to align the objects we have to simply size
3791 : * each object to conform to the alignment.
3792 : */
3793 145 : size = ALIGN(size, s->align);
3794 145 : s->size = size;
3795 145 : s->reciprocal_size = reciprocal_value(size);
3796 145 : if (forced_order >= 0)
3797 0 : order = forced_order;
3798 : else
3799 145 : order = calculate_order(size);
3800 :
3801 145 : if ((int)order < 0)
3802 : return 0;
3803 :
3804 145 : s->allocflags = 0;
3805 145 : if (order)
3806 68 : s->allocflags |= __GFP_COMP;
3807 :
3808 145 : if (s->flags & SLAB_CACHE_DMA)
3809 0 : s->allocflags |= GFP_DMA;
3810 :
3811 145 : if (s->flags & SLAB_CACHE_DMA32)
3812 0 : s->allocflags |= GFP_DMA32;
3813 :
3814 145 : if (s->flags & SLAB_RECLAIM_ACCOUNT)
3815 37 : s->allocflags |= __GFP_RECLAIMABLE;
3816 :
3817 : /*
3818 : * Determine the number of objects per slab
3819 : */
3820 145 : s->oo = oo_make(order, size);
3821 145 : s->min = oo_make(get_order(size), size);
3822 145 : if (oo_objects(s->oo) > oo_objects(s->max))
3823 145 : s->max = s->oo;
3824 :
3825 145 : return !!oo_objects(s->oo);
3826 : }
3827 :
3828 145 : static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
3829 : {
3830 145 : s->flags = kmem_cache_flags(s->size, flags, s->name);
3831 : #ifdef CONFIG_SLAB_FREELIST_HARDENED
3832 : s->random = get_random_long();
3833 : #endif
3834 :
3835 145 : if (!calculate_sizes(s, -1))
3836 0 : goto error;
3837 145 : if (disable_higher_order_debug) {
3838 : /*
3839 : * Disable debugging flags that store metadata if the min slab
3840 : * order increased.
3841 : */
3842 0 : if (get_order(s->size) > get_order(s->object_size)) {
3843 0 : s->flags &= ~DEBUG_METADATA_FLAGS;
3844 0 : s->offset = 0;
3845 0 : if (!calculate_sizes(s, -1))
3846 0 : goto error;
3847 : }
3848 : }
3849 :
3850 : #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3851 : defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3852 145 : if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
3853 : /* Enable fast mode */
3854 145 : s->flags |= __CMPXCHG_DOUBLE;
3855 : #endif
3856 :
3857 : /*
3858 : * The larger the object size is, the more pages we want on the partial
3859 : * list to avoid pounding the page allocator excessively.
3860 : */
3861 145 : set_min_partial(s, ilog2(s->size) / 2);
3862 :
3863 145 : set_cpu_partial(s);
3864 :
3865 : #ifdef CONFIG_NUMA
3866 145 : s->remote_node_defrag_ratio = 1000;
3867 : #endif
3868 :
3869 : /* Initialize the pre-computed randomized freelist if slab is up */
3870 145 : if (slab_state >= UP) {
3871 145 : if (init_cache_random_seq(s))
3872 : goto error;
3873 : }
3874 :
3875 145 : if (!init_kmem_cache_nodes(s))
3876 0 : goto error;
3877 :
3878 145 : if (alloc_kmem_cache_cpus(s))
3879 : return 0;
3880 :
3881 0 : free_kmem_cache_nodes(s);
3882 : error:
3883 : return -EINVAL;
3884 : }
3885 :
3886 0 : static void list_slab_objects(struct kmem_cache *s, struct page *page,
3887 : const char *text)
3888 : {
3889 : #ifdef CONFIG_SLUB_DEBUG
3890 0 : void *addr = page_address(page);
3891 0 : unsigned long *map;
3892 0 : void *p;
3893 :
3894 0 : slab_err(s, page, text, s->name);
3895 0 : slab_lock(page);
3896 :
3897 0 : map = get_map(s, page);
3898 0 : for_each_object(p, s, addr, page->objects) {
3899 :
3900 0 : if (!test_bit(__obj_to_index(s, addr, p), map)) {
3901 0 : pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
3902 0 : print_tracking(s, p);
3903 : }
3904 : }
3905 0 : put_map(map);
3906 0 : slab_unlock(page);
3907 : #endif
3908 0 : }
3909 :
3910 : /*
3911 : * Attempt to free all partial slabs on a node.
3912 : * This is called from __kmem_cache_shutdown(). We must take list_lock
3913 : * because sysfs file might still access partial list after the shutdowning.
3914 : */
3915 0 : static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3916 : {
3917 0 : LIST_HEAD(discard);
3918 0 : struct page *page, *h;
3919 :
3920 0 : BUG_ON(irqs_disabled());
3921 0 : spin_lock_irq(&n->list_lock);
3922 0 : list_for_each_entry_safe(page, h, &n->partial, slab_list) {
3923 0 : if (!page->inuse) {
3924 0 : remove_partial(n, page);
3925 0 : list_add(&page->slab_list, &discard);
3926 : } else {
3927 0 : list_slab_objects(s, page,
3928 : "Objects remaining in %s on __kmem_cache_shutdown()");
3929 : }
3930 : }
3931 0 : spin_unlock_irq(&n->list_lock);
3932 :
3933 0 : list_for_each_entry_safe(page, h, &discard, slab_list)
3934 0 : discard_slab(s, page);
3935 0 : }
3936 :
3937 0 : bool __kmem_cache_empty(struct kmem_cache *s)
3938 : {
3939 0 : int node;
3940 0 : struct kmem_cache_node *n;
3941 :
3942 0 : for_each_kmem_cache_node(s, node, n)
3943 0 : if (n->nr_partial || slabs_node(s, node))
3944 : return false;
3945 : return true;
3946 : }
3947 :
3948 : /*
3949 : * Release all resources used by a slab cache.
3950 : */
3951 0 : int __kmem_cache_shutdown(struct kmem_cache *s)
3952 : {
3953 0 : int node;
3954 0 : struct kmem_cache_node *n;
3955 :
3956 0 : flush_all(s);
3957 : /* Attempt to free all objects */
3958 0 : for_each_kmem_cache_node(s, node, n) {
3959 0 : free_partial(s, n);
3960 0 : if (n->nr_partial || slabs_node(s, node))
3961 : return 1;
3962 : }
3963 : return 0;
3964 : }
3965 :
3966 0 : void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
3967 : {
3968 0 : void *base;
3969 0 : int __maybe_unused i;
3970 0 : unsigned int objnr;
3971 0 : void *objp;
3972 0 : void *objp0;
3973 0 : struct kmem_cache *s = page->slab_cache;
3974 0 : struct track __maybe_unused *trackp;
3975 :
3976 0 : kpp->kp_ptr = object;
3977 0 : kpp->kp_page = page;
3978 0 : kpp->kp_slab_cache = s;
3979 0 : base = page_address(page);
3980 0 : objp0 = kasan_reset_tag(object);
3981 : #ifdef CONFIG_SLUB_DEBUG
3982 0 : objp = restore_red_left(s, objp0);
3983 : #else
3984 : objp = objp0;
3985 : #endif
3986 0 : objnr = obj_to_index(s, page, objp);
3987 0 : kpp->kp_data_offset = (unsigned long)((char *)objp0 - (char *)objp);
3988 0 : objp = base + s->size * objnr;
3989 0 : kpp->kp_objp = objp;
3990 0 : if (WARN_ON_ONCE(objp < base || objp >= base + page->objects * s->size || (objp - base) % s->size) ||
3991 0 : !(s->flags & SLAB_STORE_USER))
3992 : return;
3993 : #ifdef CONFIG_SLUB_DEBUG
3994 0 : trackp = get_track(s, objp, TRACK_ALLOC);
3995 0 : kpp->kp_ret = (void *)trackp->addr;
3996 : #ifdef CONFIG_STACKTRACE
3997 0 : for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
3998 0 : kpp->kp_stack[i] = (void *)trackp->addrs[i];
3999 0 : if (!kpp->kp_stack[i])
4000 : break;
4001 : }
4002 : #endif
4003 : #endif
4004 : }
4005 :
4006 : /********************************************************************
4007 : * Kmalloc subsystem
4008 : *******************************************************************/
4009 :
4010 0 : static int __init setup_slub_min_order(char *str)
4011 : {
4012 0 : get_option(&str, (int *)&slub_min_order);
4013 :
4014 0 : return 1;
4015 : }
4016 :
4017 : __setup("slub_min_order=", setup_slub_min_order);
4018 :
4019 0 : static int __init setup_slub_max_order(char *str)
4020 : {
4021 0 : get_option(&str, (int *)&slub_max_order);
4022 0 : slub_max_order = min(slub_max_order, (unsigned int)MAX_ORDER - 1);
4023 :
4024 0 : return 1;
4025 : }
4026 :
4027 : __setup("slub_max_order=", setup_slub_max_order);
4028 :
4029 0 : static int __init setup_slub_min_objects(char *str)
4030 : {
4031 0 : get_option(&str, (int *)&slub_min_objects);
4032 :
4033 0 : return 1;
4034 : }
4035 :
4036 : __setup("slub_min_objects=", setup_slub_min_objects);
4037 :
4038 42689 : void *__kmalloc(size_t size, gfp_t flags)
4039 : {
4040 42689 : struct kmem_cache *s;
4041 42689 : void *ret;
4042 :
4043 42689 : if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4044 8 : return kmalloc_large(size, flags);
4045 :
4046 42685 : s = kmalloc_slab(size, flags);
4047 :
4048 42685 : if (unlikely(ZERO_OR_NULL_PTR(s)))
4049 : return s;
4050 :
4051 42685 : ret = slab_alloc(s, flags, _RET_IP_, size);
4052 :
4053 42685 : trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
4054 :
4055 42685 : ret = kasan_kmalloc(s, ret, size, flags);
4056 :
4057 42685 : return ret;
4058 : }
4059 : EXPORT_SYMBOL(__kmalloc);
4060 :
4061 : #ifdef CONFIG_NUMA
4062 2 : static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
4063 : {
4064 2 : struct page *page;
4065 2 : void *ptr = NULL;
4066 2 : unsigned int order = get_order(size);
4067 :
4068 2 : flags |= __GFP_COMP;
4069 2 : page = alloc_pages_node(node, flags, order);
4070 2 : if (page) {
4071 2 : ptr = page_address(page);
4072 2 : mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
4073 2 : PAGE_SIZE << order);
4074 : }
4075 :
4076 2 : return kmalloc_large_node_hook(ptr, size, flags);
4077 : }
4078 :
4079 14121 : void *__kmalloc_node(size_t size, gfp_t flags, int node)
4080 : {
4081 14121 : struct kmem_cache *s;
4082 14121 : void *ret;
4083 :
4084 14121 : if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4085 2 : ret = kmalloc_large_node(size, flags, node);
4086 :
4087 2 : trace_kmalloc_node(_RET_IP_, ret,
4088 2 : size, PAGE_SIZE << get_order(size),
4089 : flags, node);
4090 :
4091 2 : return ret;
4092 : }
4093 :
4094 14119 : s = kmalloc_slab(size, flags);
4095 :
4096 14119 : if (unlikely(ZERO_OR_NULL_PTR(s)))
4097 : return s;
4098 :
4099 14119 : ret = slab_alloc_node(s, flags, node, _RET_IP_, size);
4100 :
4101 14120 : trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
4102 :
4103 14120 : ret = kasan_kmalloc(s, ret, size, flags);
4104 :
4105 14120 : return ret;
4106 : }
4107 : EXPORT_SYMBOL(__kmalloc_node);
4108 : #endif /* CONFIG_NUMA */
4109 :
4110 : #ifdef CONFIG_HARDENED_USERCOPY
4111 : /*
4112 : * Rejects incorrectly sized objects and objects that are to be copied
4113 : * to/from userspace but do not fall entirely within the containing slab
4114 : * cache's usercopy region.
4115 : *
4116 : * Returns NULL if check passes, otherwise const char * to name of cache
4117 : * to indicate an error.
4118 : */
4119 : void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
4120 : bool to_user)
4121 : {
4122 : struct kmem_cache *s;
4123 : unsigned int offset;
4124 : size_t object_size;
4125 : bool is_kfence = is_kfence_address(ptr);
4126 :
4127 : ptr = kasan_reset_tag(ptr);
4128 :
4129 : /* Find object and usable object size. */
4130 : s = page->slab_cache;
4131 :
4132 : /* Reject impossible pointers. */
4133 : if (ptr < page_address(page))
4134 : usercopy_abort("SLUB object not in SLUB page?!", NULL,
4135 : to_user, 0, n);
4136 :
4137 : /* Find offset within object. */
4138 : if (is_kfence)
4139 : offset = ptr - kfence_object_start(ptr);
4140 : else
4141 : offset = (ptr - page_address(page)) % s->size;
4142 :
4143 : /* Adjust for redzone and reject if within the redzone. */
4144 : if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
4145 : if (offset < s->red_left_pad)
4146 : usercopy_abort("SLUB object in left red zone",
4147 : s->name, to_user, offset, n);
4148 : offset -= s->red_left_pad;
4149 : }
4150 :
4151 : /* Allow address range falling entirely within usercopy region. */
4152 : if (offset >= s->useroffset &&
4153 : offset - s->useroffset <= s->usersize &&
4154 : n <= s->useroffset - offset + s->usersize)
4155 : return;
4156 :
4157 : /*
4158 : * If the copy is still within the allocated object, produce
4159 : * a warning instead of rejecting the copy. This is intended
4160 : * to be a temporary method to find any missing usercopy
4161 : * whitelists.
4162 : */
4163 : object_size = slab_ksize(s);
4164 : if (usercopy_fallback &&
4165 : offset <= object_size && n <= object_size - offset) {
4166 : usercopy_warn("SLUB object", s->name, to_user, offset, n);
4167 : return;
4168 : }
4169 :
4170 : usercopy_abort("SLUB object", s->name, to_user, offset, n);
4171 : }
4172 : #endif /* CONFIG_HARDENED_USERCOPY */
4173 :
4174 10255 : size_t __ksize(const void *object)
4175 : {
4176 10255 : struct page *page;
4177 :
4178 10255 : if (unlikely(object == ZERO_SIZE_PTR))
4179 : return 0;
4180 :
4181 10255 : page = virt_to_head_page(object);
4182 :
4183 10255 : if (unlikely(!PageSlab(page))) {
4184 0 : WARN_ON(!PageCompound(page));
4185 0 : return page_size(page);
4186 : }
4187 :
4188 10255 : return slab_ksize(page->slab_cache);
4189 : }
4190 : EXPORT_SYMBOL(__ksize);
4191 :
4192 120171 : void kfree(const void *x)
4193 : {
4194 120171 : struct page *page;
4195 120171 : void *object = (void *)x;
4196 :
4197 120171 : trace_kfree(_RET_IP_, x);
4198 :
4199 120180 : if (unlikely(ZERO_OR_NULL_PTR(x)))
4200 : return;
4201 :
4202 101868 : page = virt_to_head_page(x);
4203 101870 : if (unlikely(!PageSlab(page))) {
4204 16 : unsigned int order = compound_order(page);
4205 :
4206 16 : BUG_ON(!PageCompound(page));
4207 16 : kfree_hook(object);
4208 16 : mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
4209 16 : -(PAGE_SIZE << order));
4210 16 : __free_pages(page, order);
4211 16 : return;
4212 : }
4213 203698 : slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
4214 : }
4215 : EXPORT_SYMBOL(kfree);
4216 :
4217 : #define SHRINK_PROMOTE_MAX 32
4218 :
4219 : /*
4220 : * kmem_cache_shrink discards empty slabs and promotes the slabs filled
4221 : * up most to the head of the partial lists. New allocations will then
4222 : * fill those up and thus they can be removed from the partial lists.
4223 : *
4224 : * The slabs with the least items are placed last. This results in them
4225 : * being allocated from last increasing the chance that the last objects
4226 : * are freed in them.
4227 : */
4228 0 : int __kmem_cache_shrink(struct kmem_cache *s)
4229 : {
4230 0 : int node;
4231 0 : int i;
4232 0 : struct kmem_cache_node *n;
4233 0 : struct page *page;
4234 0 : struct page *t;
4235 0 : struct list_head discard;
4236 0 : struct list_head promote[SHRINK_PROMOTE_MAX];
4237 0 : unsigned long flags;
4238 0 : int ret = 0;
4239 :
4240 0 : flush_all(s);
4241 0 : for_each_kmem_cache_node(s, node, n) {
4242 0 : INIT_LIST_HEAD(&discard);
4243 0 : for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
4244 0 : INIT_LIST_HEAD(promote + i);
4245 :
4246 0 : spin_lock_irqsave(&n->list_lock, flags);
4247 :
4248 : /*
4249 : * Build lists of slabs to discard or promote.
4250 : *
4251 : * Note that concurrent frees may occur while we hold the
4252 : * list_lock. page->inuse here is the upper limit.
4253 : */
4254 0 : list_for_each_entry_safe(page, t, &n->partial, slab_list) {
4255 0 : int free = page->objects - page->inuse;
4256 :
4257 : /* Do not reread page->inuse */
4258 0 : barrier();
4259 :
4260 : /* We do not keep full slabs on the list */
4261 0 : BUG_ON(free <= 0);
4262 :
4263 0 : if (free == page->objects) {
4264 0 : list_move(&page->slab_list, &discard);
4265 0 : n->nr_partial--;
4266 0 : } else if (free <= SHRINK_PROMOTE_MAX)
4267 0 : list_move(&page->slab_list, promote + free - 1);
4268 : }
4269 :
4270 : /*
4271 : * Promote the slabs filled up most to the head of the
4272 : * partial list.
4273 : */
4274 0 : for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
4275 0 : list_splice(promote + i, &n->partial);
4276 :
4277 0 : spin_unlock_irqrestore(&n->list_lock, flags);
4278 :
4279 : /* Release empty slabs */
4280 0 : list_for_each_entry_safe(page, t, &discard, slab_list)
4281 0 : discard_slab(s, page);
4282 :
4283 0 : if (slabs_node(s, node))
4284 0 : ret = 1;
4285 : }
4286 :
4287 0 : return ret;
4288 : }
4289 :
4290 : static int slab_mem_going_offline_callback(void *arg)
4291 : {
4292 : struct kmem_cache *s;
4293 :
4294 : mutex_lock(&slab_mutex);
4295 : list_for_each_entry(s, &slab_caches, list)
4296 : __kmem_cache_shrink(s);
4297 : mutex_unlock(&slab_mutex);
4298 :
4299 : return 0;
4300 : }
4301 :
4302 : static void slab_mem_offline_callback(void *arg)
4303 : {
4304 : struct memory_notify *marg = arg;
4305 : int offline_node;
4306 :
4307 : offline_node = marg->status_change_nid_normal;
4308 :
4309 : /*
4310 : * If the node still has available memory. we need kmem_cache_node
4311 : * for it yet.
4312 : */
4313 : if (offline_node < 0)
4314 : return;
4315 :
4316 : mutex_lock(&slab_mutex);
4317 : node_clear(offline_node, slab_nodes);
4318 : /*
4319 : * We no longer free kmem_cache_node structures here, as it would be
4320 : * racy with all get_node() users, and infeasible to protect them with
4321 : * slab_mutex.
4322 : */
4323 : mutex_unlock(&slab_mutex);
4324 : }
4325 :
4326 : static int slab_mem_going_online_callback(void *arg)
4327 : {
4328 : struct kmem_cache_node *n;
4329 : struct kmem_cache *s;
4330 : struct memory_notify *marg = arg;
4331 : int nid = marg->status_change_nid_normal;
4332 : int ret = 0;
4333 :
4334 : /*
4335 : * If the node's memory is already available, then kmem_cache_node is
4336 : * already created. Nothing to do.
4337 : */
4338 : if (nid < 0)
4339 : return 0;
4340 :
4341 : /*
4342 : * We are bringing a node online. No memory is available yet. We must
4343 : * allocate a kmem_cache_node structure in order to bring the node
4344 : * online.
4345 : */
4346 : mutex_lock(&slab_mutex);
4347 : list_for_each_entry(s, &slab_caches, list) {
4348 : /*
4349 : * The structure may already exist if the node was previously
4350 : * onlined and offlined.
4351 : */
4352 : if (get_node(s, nid))
4353 : continue;
4354 : /*
4355 : * XXX: kmem_cache_alloc_node will fallback to other nodes
4356 : * since memory is not yet available from the node that
4357 : * is brought up.
4358 : */
4359 : n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
4360 : if (!n) {
4361 : ret = -ENOMEM;
4362 : goto out;
4363 : }
4364 : init_kmem_cache_node(n);
4365 : s->node[nid] = n;
4366 : }
4367 : /*
4368 : * Any cache created after this point will also have kmem_cache_node
4369 : * initialized for the new node.
4370 : */
4371 : node_set(nid, slab_nodes);
4372 : out:
4373 : mutex_unlock(&slab_mutex);
4374 : return ret;
4375 : }
4376 :
4377 : static int slab_memory_callback(struct notifier_block *self,
4378 : unsigned long action, void *arg)
4379 : {
4380 : int ret = 0;
4381 :
4382 : switch (action) {
4383 : case MEM_GOING_ONLINE:
4384 : ret = slab_mem_going_online_callback(arg);
4385 : break;
4386 : case MEM_GOING_OFFLINE:
4387 : ret = slab_mem_going_offline_callback(arg);
4388 : break;
4389 : case MEM_OFFLINE:
4390 : case MEM_CANCEL_ONLINE:
4391 : slab_mem_offline_callback(arg);
4392 : break;
4393 : case MEM_ONLINE:
4394 : case MEM_CANCEL_OFFLINE:
4395 : break;
4396 : }
4397 : if (ret)
4398 : ret = notifier_from_errno(ret);
4399 : else
4400 : ret = NOTIFY_OK;
4401 : return ret;
4402 : }
4403 :
4404 : static struct notifier_block slab_memory_callback_nb = {
4405 : .notifier_call = slab_memory_callback,
4406 : .priority = SLAB_CALLBACK_PRI,
4407 : };
4408 :
4409 : /********************************************************************
4410 : * Basic setup of slabs
4411 : *******************************************************************/
4412 :
4413 : /*
4414 : * Used for early kmem_cache structures that were allocated using
4415 : * the page allocator. Allocate them properly then fix up the pointers
4416 : * that may be pointing to the wrong kmem_cache structure.
4417 : */
4418 :
4419 2 : static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
4420 : {
4421 2 : int node;
4422 2 : struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
4423 2 : struct kmem_cache_node *n;
4424 :
4425 2 : memcpy(s, static_cache, kmem_cache->object_size);
4426 :
4427 : /*
4428 : * This runs very early, and only the boot processor is supposed to be
4429 : * up. Even if it weren't true, IRQs are not up so we couldn't fire
4430 : * IPIs around.
4431 : */
4432 2 : __flush_cpu_slab(s, smp_processor_id());
4433 6 : for_each_kmem_cache_node(s, node, n) {
4434 2 : struct page *p;
4435 :
4436 4 : list_for_each_entry(p, &n->partial, slab_list)
4437 2 : p->slab_cache = s;
4438 :
4439 : #ifdef CONFIG_SLUB_DEBUG
4440 2 : list_for_each_entry(p, &n->full, slab_list)
4441 0 : p->slab_cache = s;
4442 : #endif
4443 : }
4444 2 : list_add(&s->list, &slab_caches);
4445 2 : return s;
4446 : }
4447 :
4448 1 : void __init kmem_cache_init(void)
4449 : {
4450 1 : static __initdata struct kmem_cache boot_kmem_cache,
4451 : boot_kmem_cache_node;
4452 1 : int node;
4453 :
4454 1 : if (debug_guardpage_minorder())
4455 : slub_max_order = 0;
4456 :
4457 1 : kmem_cache_node = &boot_kmem_cache_node;
4458 1 : kmem_cache = &boot_kmem_cache;
4459 :
4460 : /*
4461 : * Initialize the nodemask for which we will allocate per node
4462 : * structures. Here we don't need taking slab_mutex yet.
4463 : */
4464 2 : for_each_node_state(node, N_NORMAL_MEMORY)
4465 1 : node_set(node, slab_nodes);
4466 :
4467 1 : create_boot_cache(kmem_cache_node, "kmem_cache_node",
4468 : sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
4469 :
4470 1 : register_hotmemory_notifier(&slab_memory_callback_nb);
4471 :
4472 : /* Able to allocate the per node structures */
4473 1 : slab_state = PARTIAL;
4474 :
4475 1 : create_boot_cache(kmem_cache, "kmem_cache",
4476 : offsetof(struct kmem_cache, node) +
4477 : nr_node_ids * sizeof(struct kmem_cache_node *),
4478 : SLAB_HWCACHE_ALIGN, 0, 0);
4479 :
4480 1 : kmem_cache = bootstrap(&boot_kmem_cache);
4481 1 : kmem_cache_node = bootstrap(&boot_kmem_cache_node);
4482 :
4483 : /* Now we can use the kmem_cache to allocate kmalloc slabs */
4484 1 : setup_kmalloc_cache_index_table();
4485 1 : create_kmalloc_caches(0);
4486 :
4487 : /* Setup random freelists for each cache */
4488 1 : init_freelist_randomization();
4489 :
4490 1 : cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
4491 : slub_cpu_dead);
4492 :
4493 1 : pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
4494 : cache_line_size(),
4495 : slub_min_order, slub_max_order, slub_min_objects,
4496 : nr_cpu_ids, nr_node_ids);
4497 1 : }
4498 :
4499 1 : void __init kmem_cache_init_late(void)
4500 : {
4501 1 : }
4502 :
4503 : struct kmem_cache *
4504 109 : __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
4505 : slab_flags_t flags, void (*ctor)(void *))
4506 : {
4507 109 : struct kmem_cache *s;
4508 :
4509 109 : s = find_mergeable(size, align, flags, name, ctor);
4510 109 : if (s) {
4511 0 : s->refcount++;
4512 :
4513 : /*
4514 : * Adjust the object sizes so that we clear
4515 : * the complete object on kzalloc.
4516 : */
4517 0 : s->object_size = max(s->object_size, size);
4518 0 : s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
4519 :
4520 0 : if (sysfs_slab_alias(s, name)) {
4521 0 : s->refcount--;
4522 0 : s = NULL;
4523 : }
4524 : }
4525 :
4526 109 : return s;
4527 : }
4528 :
4529 145 : int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
4530 : {
4531 145 : int err;
4532 :
4533 145 : err = kmem_cache_open(s, flags);
4534 145 : if (err)
4535 : return err;
4536 :
4537 : /* Mutex is not taken during early boot */
4538 145 : if (slab_state <= UP)
4539 : return 0;
4540 :
4541 37 : err = sysfs_slab_add(s);
4542 37 : if (err)
4543 0 : __kmem_cache_release(s);
4544 :
4545 : return err;
4546 : }
4547 :
4548 8244 : void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4549 : {
4550 8244 : struct kmem_cache *s;
4551 8244 : void *ret;
4552 :
4553 8244 : if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4554 0 : return kmalloc_large(size, gfpflags);
4555 :
4556 8244 : s = kmalloc_slab(size, gfpflags);
4557 :
4558 8244 : if (unlikely(ZERO_OR_NULL_PTR(s)))
4559 : return s;
4560 :
4561 8244 : ret = slab_alloc(s, gfpflags, caller, size);
4562 :
4563 : /* Honor the call site pointer we received. */
4564 8244 : trace_kmalloc(caller, ret, size, s->size, gfpflags);
4565 :
4566 8244 : return ret;
4567 : }
4568 : EXPORT_SYMBOL(__kmalloc_track_caller);
4569 :
4570 : #ifdef CONFIG_NUMA
4571 5213 : void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4572 : int node, unsigned long caller)
4573 : {
4574 5213 : struct kmem_cache *s;
4575 5213 : void *ret;
4576 :
4577 5213 : if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4578 0 : ret = kmalloc_large_node(size, gfpflags, node);
4579 :
4580 0 : trace_kmalloc_node(caller, ret,
4581 0 : size, PAGE_SIZE << get_order(size),
4582 : gfpflags, node);
4583 :
4584 0 : return ret;
4585 : }
4586 :
4587 5213 : s = kmalloc_slab(size, gfpflags);
4588 :
4589 5213 : if (unlikely(ZERO_OR_NULL_PTR(s)))
4590 : return s;
4591 :
4592 5213 : ret = slab_alloc_node(s, gfpflags, node, caller, size);
4593 :
4594 : /* Honor the call site pointer we received. */
4595 5213 : trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4596 :
4597 5213 : return ret;
4598 : }
4599 : EXPORT_SYMBOL(__kmalloc_node_track_caller);
4600 : #endif
4601 :
4602 : #ifdef CONFIG_SYSFS
4603 0 : static int count_inuse(struct page *page)
4604 : {
4605 0 : return page->inuse;
4606 : }
4607 :
4608 0 : static int count_total(struct page *page)
4609 : {
4610 0 : return page->objects;
4611 : }
4612 : #endif
4613 :
4614 : #ifdef CONFIG_SLUB_DEBUG
4615 0 : static void validate_slab(struct kmem_cache *s, struct page *page)
4616 : {
4617 0 : void *p;
4618 0 : void *addr = page_address(page);
4619 0 : unsigned long *map;
4620 :
4621 0 : slab_lock(page);
4622 :
4623 0 : if (!check_slab(s, page) || !on_freelist(s, page, NULL))
4624 0 : goto unlock;
4625 :
4626 : /* Now we know that a valid freelist exists */
4627 0 : map = get_map(s, page);
4628 0 : for_each_object(p, s, addr, page->objects) {
4629 0 : u8 val = test_bit(__obj_to_index(s, addr, p), map) ?
4630 : SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
4631 :
4632 0 : if (!check_object(s, page, p, val))
4633 : break;
4634 : }
4635 0 : put_map(map);
4636 0 : unlock:
4637 0 : slab_unlock(page);
4638 0 : }
4639 :
4640 0 : static int validate_slab_node(struct kmem_cache *s,
4641 : struct kmem_cache_node *n)
4642 : {
4643 0 : unsigned long count = 0;
4644 0 : struct page *page;
4645 0 : unsigned long flags;
4646 :
4647 0 : spin_lock_irqsave(&n->list_lock, flags);
4648 :
4649 0 : list_for_each_entry(page, &n->partial, slab_list) {
4650 0 : validate_slab(s, page);
4651 0 : count++;
4652 : }
4653 0 : if (count != n->nr_partial)
4654 0 : pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
4655 : s->name, count, n->nr_partial);
4656 :
4657 0 : if (!(s->flags & SLAB_STORE_USER))
4658 0 : goto out;
4659 :
4660 0 : list_for_each_entry(page, &n->full, slab_list) {
4661 0 : validate_slab(s, page);
4662 0 : count++;
4663 : }
4664 0 : if (count != atomic_long_read(&n->nr_slabs))
4665 0 : pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
4666 : s->name, count, atomic_long_read(&n->nr_slabs));
4667 :
4668 0 : out:
4669 0 : spin_unlock_irqrestore(&n->list_lock, flags);
4670 0 : return count;
4671 : }
4672 :
4673 0 : static long validate_slab_cache(struct kmem_cache *s)
4674 : {
4675 0 : int node;
4676 0 : unsigned long count = 0;
4677 0 : struct kmem_cache_node *n;
4678 :
4679 0 : flush_all(s);
4680 0 : for_each_kmem_cache_node(s, node, n)
4681 0 : count += validate_slab_node(s, n);
4682 :
4683 0 : return count;
4684 : }
4685 : /*
4686 : * Generate lists of code addresses where slabcache objects are allocated
4687 : * and freed.
4688 : */
4689 :
4690 : struct location {
4691 : unsigned long count;
4692 : unsigned long addr;
4693 : long long sum_time;
4694 : long min_time;
4695 : long max_time;
4696 : long min_pid;
4697 : long max_pid;
4698 : DECLARE_BITMAP(cpus, NR_CPUS);
4699 : nodemask_t nodes;
4700 : };
4701 :
4702 : struct loc_track {
4703 : unsigned long max;
4704 : unsigned long count;
4705 : struct location *loc;
4706 : };
4707 :
4708 0 : static void free_loc_track(struct loc_track *t)
4709 : {
4710 0 : if (t->max)
4711 0 : free_pages((unsigned long)t->loc,
4712 0 : get_order(sizeof(struct location) * t->max));
4713 0 : }
4714 :
4715 0 : static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4716 : {
4717 0 : struct location *l;
4718 0 : int order;
4719 :
4720 0 : order = get_order(sizeof(struct location) * max);
4721 :
4722 0 : l = (void *)__get_free_pages(flags, order);
4723 0 : if (!l)
4724 : return 0;
4725 :
4726 0 : if (t->count) {
4727 0 : memcpy(l, t->loc, sizeof(struct location) * t->count);
4728 0 : free_loc_track(t);
4729 : }
4730 0 : t->max = max;
4731 0 : t->loc = l;
4732 0 : return 1;
4733 : }
4734 :
4735 0 : static int add_location(struct loc_track *t, struct kmem_cache *s,
4736 : const struct track *track)
4737 : {
4738 0 : long start, end, pos;
4739 0 : struct location *l;
4740 0 : unsigned long caddr;
4741 0 : unsigned long age = jiffies - track->when;
4742 :
4743 0 : start = -1;
4744 0 : end = t->count;
4745 :
4746 0 : for ( ; ; ) {
4747 0 : pos = start + (end - start + 1) / 2;
4748 :
4749 : /*
4750 : * There is nothing at "end". If we end up there
4751 : * we need to add something to before end.
4752 : */
4753 0 : if (pos == end)
4754 : break;
4755 :
4756 0 : caddr = t->loc[pos].addr;
4757 0 : if (track->addr == caddr) {
4758 :
4759 0 : l = &t->loc[pos];
4760 0 : l->count++;
4761 0 : if (track->when) {
4762 0 : l->sum_time += age;
4763 0 : if (age < l->min_time)
4764 0 : l->min_time = age;
4765 0 : if (age > l->max_time)
4766 0 : l->max_time = age;
4767 :
4768 0 : if (track->pid < l->min_pid)
4769 0 : l->min_pid = track->pid;
4770 0 : if (track->pid > l->max_pid)
4771 0 : l->max_pid = track->pid;
4772 :
4773 0 : cpumask_set_cpu(track->cpu,
4774 0 : to_cpumask(l->cpus));
4775 : }
4776 0 : node_set(page_to_nid(virt_to_page(track)), l->nodes);
4777 0 : return 1;
4778 : }
4779 :
4780 0 : if (track->addr < caddr)
4781 : end = pos;
4782 : else
4783 0 : start = pos;
4784 : }
4785 :
4786 : /*
4787 : * Not found. Insert new tracking element.
4788 : */
4789 0 : if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4790 : return 0;
4791 :
4792 0 : l = t->loc + pos;
4793 0 : if (pos < t->count)
4794 0 : memmove(l + 1, l,
4795 : (t->count - pos) * sizeof(struct location));
4796 0 : t->count++;
4797 0 : l->count = 1;
4798 0 : l->addr = track->addr;
4799 0 : l->sum_time = age;
4800 0 : l->min_time = age;
4801 0 : l->max_time = age;
4802 0 : l->min_pid = track->pid;
4803 0 : l->max_pid = track->pid;
4804 0 : cpumask_clear(to_cpumask(l->cpus));
4805 0 : cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4806 0 : nodes_clear(l->nodes);
4807 0 : node_set(page_to_nid(virt_to_page(track)), l->nodes);
4808 0 : return 1;
4809 : }
4810 :
4811 0 : static void process_slab(struct loc_track *t, struct kmem_cache *s,
4812 : struct page *page, enum track_item alloc)
4813 : {
4814 0 : void *addr = page_address(page);
4815 0 : void *p;
4816 0 : unsigned long *map;
4817 :
4818 0 : map = get_map(s, page);
4819 0 : for_each_object(p, s, addr, page->objects)
4820 0 : if (!test_bit(__obj_to_index(s, addr, p), map))
4821 0 : add_location(t, s, get_track(s, p, alloc));
4822 0 : put_map(map);
4823 0 : }
4824 :
4825 0 : static int list_locations(struct kmem_cache *s, char *buf,
4826 : enum track_item alloc)
4827 : {
4828 0 : int len = 0;
4829 0 : unsigned long i;
4830 0 : struct loc_track t = { 0, 0, NULL };
4831 0 : int node;
4832 0 : struct kmem_cache_node *n;
4833 :
4834 0 : if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4835 : GFP_KERNEL)) {
4836 0 : return sysfs_emit(buf, "Out of memory\n");
4837 : }
4838 : /* Push back cpu slabs */
4839 0 : flush_all(s);
4840 :
4841 0 : for_each_kmem_cache_node(s, node, n) {
4842 0 : unsigned long flags;
4843 0 : struct page *page;
4844 :
4845 0 : if (!atomic_long_read(&n->nr_slabs))
4846 0 : continue;
4847 :
4848 0 : spin_lock_irqsave(&n->list_lock, flags);
4849 0 : list_for_each_entry(page, &n->partial, slab_list)
4850 0 : process_slab(&t, s, page, alloc);
4851 0 : list_for_each_entry(page, &n->full, slab_list)
4852 0 : process_slab(&t, s, page, alloc);
4853 0 : spin_unlock_irqrestore(&n->list_lock, flags);
4854 : }
4855 :
4856 0 : for (i = 0; i < t.count; i++) {
4857 0 : struct location *l = &t.loc[i];
4858 :
4859 0 : len += sysfs_emit_at(buf, len, "%7ld ", l->count);
4860 :
4861 0 : if (l->addr)
4862 0 : len += sysfs_emit_at(buf, len, "%pS", (void *)l->addr);
4863 : else
4864 0 : len += sysfs_emit_at(buf, len, "<not-available>");
4865 :
4866 0 : if (l->sum_time != l->min_time)
4867 0 : len += sysfs_emit_at(buf, len, " age=%ld/%ld/%ld",
4868 : l->min_time,
4869 0 : (long)div_u64(l->sum_time,
4870 0 : l->count),
4871 : l->max_time);
4872 : else
4873 0 : len += sysfs_emit_at(buf, len, " age=%ld", l->min_time);
4874 :
4875 0 : if (l->min_pid != l->max_pid)
4876 0 : len += sysfs_emit_at(buf, len, " pid=%ld-%ld",
4877 : l->min_pid, l->max_pid);
4878 : else
4879 0 : len += sysfs_emit_at(buf, len, " pid=%ld",
4880 : l->min_pid);
4881 :
4882 0 : if (num_online_cpus() > 1 &&
4883 0 : !cpumask_empty(to_cpumask(l->cpus)))
4884 0 : len += sysfs_emit_at(buf, len, " cpus=%*pbl",
4885 0 : cpumask_pr_args(to_cpumask(l->cpus)));
4886 :
4887 0 : if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
4888 0 : len += sysfs_emit_at(buf, len, " nodes=%*pbl",
4889 0 : nodemask_pr_args(&l->nodes));
4890 :
4891 0 : len += sysfs_emit_at(buf, len, "\n");
4892 : }
4893 :
4894 0 : free_loc_track(&t);
4895 0 : if (!t.count)
4896 0 : len += sysfs_emit_at(buf, len, "No data\n");
4897 :
4898 : return len;
4899 : }
4900 : #endif /* CONFIG_SLUB_DEBUG */
4901 :
4902 : #ifdef SLUB_RESILIENCY_TEST
4903 : static void __init resiliency_test(void)
4904 : {
4905 : u8 *p;
4906 : int type = KMALLOC_NORMAL;
4907 :
4908 : BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
4909 :
4910 : pr_err("SLUB resiliency testing\n");
4911 : pr_err("-----------------------\n");
4912 : pr_err("A. Corruption after allocation\n");
4913 :
4914 : p = kzalloc(16, GFP_KERNEL);
4915 : p[16] = 0x12;
4916 : pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
4917 : p + 16);
4918 :
4919 : validate_slab_cache(kmalloc_caches[type][4]);
4920 :
4921 : /* Hmmm... The next two are dangerous */
4922 : p = kzalloc(32, GFP_KERNEL);
4923 : p[32 + sizeof(void *)] = 0x34;
4924 : pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
4925 : p);
4926 : pr_err("If allocated object is overwritten then not detectable\n\n");
4927 :
4928 : validate_slab_cache(kmalloc_caches[type][5]);
4929 : p = kzalloc(64, GFP_KERNEL);
4930 : p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4931 : *p = 0x56;
4932 : pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4933 : p);
4934 : pr_err("If allocated object is overwritten then not detectable\n\n");
4935 : validate_slab_cache(kmalloc_caches[type][6]);
4936 :
4937 : pr_err("\nB. Corruption after free\n");
4938 : p = kzalloc(128, GFP_KERNEL);
4939 : kfree(p);
4940 : *p = 0x78;
4941 : pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4942 : validate_slab_cache(kmalloc_caches[type][7]);
4943 :
4944 : p = kzalloc(256, GFP_KERNEL);
4945 : kfree(p);
4946 : p[50] = 0x9a;
4947 : pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
4948 : validate_slab_cache(kmalloc_caches[type][8]);
4949 :
4950 : p = kzalloc(512, GFP_KERNEL);
4951 : kfree(p);
4952 : p[512] = 0xab;
4953 : pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4954 : validate_slab_cache(kmalloc_caches[type][9]);
4955 : }
4956 : #else
4957 : #ifdef CONFIG_SYSFS
4958 1 : static void resiliency_test(void) {};
4959 : #endif
4960 : #endif /* SLUB_RESILIENCY_TEST */
4961 :
4962 : #ifdef CONFIG_SYSFS
4963 : enum slab_stat_type {
4964 : SL_ALL, /* All slabs */
4965 : SL_PARTIAL, /* Only partially allocated slabs */
4966 : SL_CPU, /* Only slabs used for cpu caches */
4967 : SL_OBJECTS, /* Determine allocated objects not slabs */
4968 : SL_TOTAL /* Determine object capacity not slabs */
4969 : };
4970 :
4971 : #define SO_ALL (1 << SL_ALL)
4972 : #define SO_PARTIAL (1 << SL_PARTIAL)
4973 : #define SO_CPU (1 << SL_CPU)
4974 : #define SO_OBJECTS (1 << SL_OBJECTS)
4975 : #define SO_TOTAL (1 << SL_TOTAL)
4976 :
4977 0 : static ssize_t show_slab_objects(struct kmem_cache *s,
4978 : char *buf, unsigned long flags)
4979 : {
4980 0 : unsigned long total = 0;
4981 0 : int node;
4982 0 : int x;
4983 0 : unsigned long *nodes;
4984 0 : int len = 0;
4985 :
4986 0 : nodes = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
4987 0 : if (!nodes)
4988 : return -ENOMEM;
4989 :
4990 0 : if (flags & SO_CPU) {
4991 : int cpu;
4992 :
4993 0 : for_each_possible_cpu(cpu) {
4994 0 : struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
4995 : cpu);
4996 0 : int node;
4997 0 : struct page *page;
4998 :
4999 0 : page = READ_ONCE(c->page);
5000 0 : if (!page)
5001 0 : continue;
5002 :
5003 0 : node = page_to_nid(page);
5004 0 : if (flags & SO_TOTAL)
5005 0 : x = page->objects;
5006 0 : else if (flags & SO_OBJECTS)
5007 0 : x = page->inuse;
5008 : else
5009 : x = 1;
5010 :
5011 0 : total += x;
5012 0 : nodes[node] += x;
5013 :
5014 0 : page = slub_percpu_partial_read_once(c);
5015 0 : if (page) {
5016 : node = page_to_nid(page);
5017 : if (flags & SO_TOTAL)
5018 : WARN_ON_ONCE(1);
5019 : else if (flags & SO_OBJECTS)
5020 : WARN_ON_ONCE(1);
5021 : else
5022 : x = page->pages;
5023 : total += x;
5024 : nodes[node] += x;
5025 : }
5026 : }
5027 : }
5028 :
5029 : /*
5030 : * It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex"
5031 : * already held which will conflict with an existing lock order:
5032 : *
5033 : * mem_hotplug_lock->slab_mutex->kernfs_mutex
5034 : *
5035 : * We don't really need mem_hotplug_lock (to hold off
5036 : * slab_mem_going_offline_callback) here because slab's memory hot
5037 : * unplug code doesn't destroy the kmem_cache->node[] data.
5038 : */
5039 :
5040 : #ifdef CONFIG_SLUB_DEBUG
5041 0 : if (flags & SO_ALL) {
5042 : struct kmem_cache_node *n;
5043 :
5044 0 : for_each_kmem_cache_node(s, node, n) {
5045 :
5046 0 : if (flags & SO_TOTAL)
5047 0 : x = atomic_long_read(&n->total_objects);
5048 0 : else if (flags & SO_OBJECTS)
5049 0 : x = atomic_long_read(&n->total_objects) -
5050 0 : count_partial(n, count_free);
5051 : else
5052 0 : x = atomic_long_read(&n->nr_slabs);
5053 0 : total += x;
5054 0 : nodes[node] += x;
5055 : }
5056 :
5057 : } else
5058 : #endif
5059 0 : if (flags & SO_PARTIAL) {
5060 : struct kmem_cache_node *n;
5061 :
5062 0 : for_each_kmem_cache_node(s, node, n) {
5063 0 : if (flags & SO_TOTAL)
5064 0 : x = count_partial(n, count_total);
5065 0 : else if (flags & SO_OBJECTS)
5066 0 : x = count_partial(n, count_inuse);
5067 : else
5068 0 : x = n->nr_partial;
5069 0 : total += x;
5070 0 : nodes[node] += x;
5071 : }
5072 : }
5073 :
5074 0 : len += sysfs_emit_at(buf, len, "%lu", total);
5075 : #ifdef CONFIG_NUMA
5076 0 : for (node = 0; node < nr_node_ids; node++) {
5077 0 : if (nodes[node])
5078 0 : len += sysfs_emit_at(buf, len, " N%d=%lu",
5079 : node, nodes[node]);
5080 : }
5081 : #endif
5082 0 : len += sysfs_emit_at(buf, len, "\n");
5083 0 : kfree(nodes);
5084 :
5085 0 : return len;
5086 : }
5087 :
5088 : #define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
5089 : #define to_slab(n) container_of(n, struct kmem_cache, kobj)
5090 :
5091 : struct slab_attribute {
5092 : struct attribute attr;
5093 : ssize_t (*show)(struct kmem_cache *s, char *buf);
5094 : ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
5095 : };
5096 :
5097 : #define SLAB_ATTR_RO(_name) \
5098 : static struct slab_attribute _name##_attr = \
5099 : __ATTR(_name, 0400, _name##_show, NULL)
5100 :
5101 : #define SLAB_ATTR(_name) \
5102 : static struct slab_attribute _name##_attr = \
5103 : __ATTR(_name, 0600, _name##_show, _name##_store)
5104 :
5105 0 : static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
5106 : {
5107 0 : return sysfs_emit(buf, "%u\n", s->size);
5108 : }
5109 : SLAB_ATTR_RO(slab_size);
5110 :
5111 0 : static ssize_t align_show(struct kmem_cache *s, char *buf)
5112 : {
5113 0 : return sysfs_emit(buf, "%u\n", s->align);
5114 : }
5115 : SLAB_ATTR_RO(align);
5116 :
5117 0 : static ssize_t object_size_show(struct kmem_cache *s, char *buf)
5118 : {
5119 0 : return sysfs_emit(buf, "%u\n", s->object_size);
5120 : }
5121 : SLAB_ATTR_RO(object_size);
5122 :
5123 0 : static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
5124 : {
5125 0 : return sysfs_emit(buf, "%u\n", oo_objects(s->oo));
5126 : }
5127 : SLAB_ATTR_RO(objs_per_slab);
5128 :
5129 0 : static ssize_t order_show(struct kmem_cache *s, char *buf)
5130 : {
5131 0 : return sysfs_emit(buf, "%u\n", oo_order(s->oo));
5132 : }
5133 : SLAB_ATTR_RO(order);
5134 :
5135 0 : static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
5136 : {
5137 0 : return sysfs_emit(buf, "%lu\n", s->min_partial);
5138 : }
5139 :
5140 0 : static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
5141 : size_t length)
5142 : {
5143 0 : unsigned long min;
5144 0 : int err;
5145 :
5146 0 : err = kstrtoul(buf, 10, &min);
5147 0 : if (err)
5148 0 : return err;
5149 :
5150 0 : set_min_partial(s, min);
5151 0 : return length;
5152 : }
5153 : SLAB_ATTR(min_partial);
5154 :
5155 0 : static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
5156 : {
5157 0 : return sysfs_emit(buf, "%u\n", slub_cpu_partial(s));
5158 : }
5159 :
5160 0 : static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
5161 : size_t length)
5162 : {
5163 0 : unsigned int objects;
5164 0 : int err;
5165 :
5166 0 : err = kstrtouint(buf, 10, &objects);
5167 0 : if (err)
5168 0 : return err;
5169 0 : if (objects && !kmem_cache_has_cpu_partial(s))
5170 : return -EINVAL;
5171 :
5172 0 : slub_set_cpu_partial(s, objects);
5173 0 : flush_all(s);
5174 0 : return length;
5175 : }
5176 : SLAB_ATTR(cpu_partial);
5177 :
5178 0 : static ssize_t ctor_show(struct kmem_cache *s, char *buf)
5179 : {
5180 0 : if (!s->ctor)
5181 : return 0;
5182 0 : return sysfs_emit(buf, "%pS\n", s->ctor);
5183 : }
5184 : SLAB_ATTR_RO(ctor);
5185 :
5186 0 : static ssize_t aliases_show(struct kmem_cache *s, char *buf)
5187 : {
5188 0 : return sysfs_emit(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
5189 : }
5190 : SLAB_ATTR_RO(aliases);
5191 :
5192 0 : static ssize_t partial_show(struct kmem_cache *s, char *buf)
5193 : {
5194 0 : return show_slab_objects(s, buf, SO_PARTIAL);
5195 : }
5196 : SLAB_ATTR_RO(partial);
5197 :
5198 0 : static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
5199 : {
5200 0 : return show_slab_objects(s, buf, SO_CPU);
5201 : }
5202 : SLAB_ATTR_RO(cpu_slabs);
5203 :
5204 0 : static ssize_t objects_show(struct kmem_cache *s, char *buf)
5205 : {
5206 0 : return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
5207 : }
5208 : SLAB_ATTR_RO(objects);
5209 :
5210 0 : static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
5211 : {
5212 0 : return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
5213 : }
5214 : SLAB_ATTR_RO(objects_partial);
5215 :
5216 0 : static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
5217 : {
5218 0 : int objects = 0;
5219 0 : int pages = 0;
5220 0 : int cpu;
5221 0 : int len = 0;
5222 :
5223 0 : for_each_online_cpu(cpu) {
5224 : struct page *page;
5225 :
5226 0 : page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5227 :
5228 : if (page) {
5229 : pages += page->pages;
5230 : objects += page->pobjects;
5231 : }
5232 : }
5233 :
5234 0 : len += sysfs_emit_at(buf, len, "%d(%d)", objects, pages);
5235 :
5236 : #ifdef CONFIG_SMP
5237 0 : for_each_online_cpu(cpu) {
5238 : struct page *page;
5239 :
5240 0 : page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5241 : if (page)
5242 : len += sysfs_emit_at(buf, len, " C%d=%d(%d)",
5243 : cpu, page->pobjects, page->pages);
5244 : }
5245 : #endif
5246 0 : len += sysfs_emit_at(buf, len, "\n");
5247 :
5248 0 : return len;
5249 : }
5250 : SLAB_ATTR_RO(slabs_cpu_partial);
5251 :
5252 0 : static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
5253 : {
5254 0 : return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
5255 : }
5256 : SLAB_ATTR_RO(reclaim_account);
5257 :
5258 0 : static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
5259 : {
5260 0 : return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
5261 : }
5262 : SLAB_ATTR_RO(hwcache_align);
5263 :
5264 : #ifdef CONFIG_ZONE_DMA
5265 : static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
5266 : {
5267 : return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
5268 : }
5269 : SLAB_ATTR_RO(cache_dma);
5270 : #endif
5271 :
5272 0 : static ssize_t usersize_show(struct kmem_cache *s, char *buf)
5273 : {
5274 0 : return sysfs_emit(buf, "%u\n", s->usersize);
5275 : }
5276 : SLAB_ATTR_RO(usersize);
5277 :
5278 0 : static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
5279 : {
5280 0 : return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
5281 : }
5282 : SLAB_ATTR_RO(destroy_by_rcu);
5283 :
5284 : #ifdef CONFIG_SLUB_DEBUG
5285 0 : static ssize_t slabs_show(struct kmem_cache *s, char *buf)
5286 : {
5287 0 : return show_slab_objects(s, buf, SO_ALL);
5288 : }
5289 : SLAB_ATTR_RO(slabs);
5290 :
5291 0 : static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
5292 : {
5293 0 : return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
5294 : }
5295 : SLAB_ATTR_RO(total_objects);
5296 :
5297 0 : static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
5298 : {
5299 0 : return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
5300 : }
5301 : SLAB_ATTR_RO(sanity_checks);
5302 :
5303 0 : static ssize_t trace_show(struct kmem_cache *s, char *buf)
5304 : {
5305 0 : return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_TRACE));
5306 : }
5307 : SLAB_ATTR_RO(trace);
5308 :
5309 0 : static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
5310 : {
5311 0 : return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
5312 : }
5313 :
5314 : SLAB_ATTR_RO(red_zone);
5315 :
5316 0 : static ssize_t poison_show(struct kmem_cache *s, char *buf)
5317 : {
5318 0 : return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_POISON));
5319 : }
5320 :
5321 : SLAB_ATTR_RO(poison);
5322 :
5323 0 : static ssize_t store_user_show(struct kmem_cache *s, char *buf)
5324 : {
5325 0 : return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
5326 : }
5327 :
5328 : SLAB_ATTR_RO(store_user);
5329 :
5330 0 : static ssize_t validate_show(struct kmem_cache *s, char *buf)
5331 : {
5332 0 : return 0;
5333 : }
5334 :
5335 0 : static ssize_t validate_store(struct kmem_cache *s,
5336 : const char *buf, size_t length)
5337 : {
5338 0 : int ret = -EINVAL;
5339 :
5340 0 : if (buf[0] == '1') {
5341 0 : ret = validate_slab_cache(s);
5342 0 : if (ret >= 0)
5343 0 : ret = length;
5344 : }
5345 0 : return ret;
5346 : }
5347 : SLAB_ATTR(validate);
5348 :
5349 0 : static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
5350 : {
5351 0 : if (!(s->flags & SLAB_STORE_USER))
5352 : return -ENOSYS;
5353 0 : return list_locations(s, buf, TRACK_ALLOC);
5354 : }
5355 : SLAB_ATTR_RO(alloc_calls);
5356 :
5357 0 : static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
5358 : {
5359 0 : if (!(s->flags & SLAB_STORE_USER))
5360 : return -ENOSYS;
5361 0 : return list_locations(s, buf, TRACK_FREE);
5362 : }
5363 : SLAB_ATTR_RO(free_calls);
5364 : #endif /* CONFIG_SLUB_DEBUG */
5365 :
5366 : #ifdef CONFIG_FAILSLAB
5367 : static ssize_t failslab_show(struct kmem_cache *s, char *buf)
5368 : {
5369 : return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
5370 : }
5371 : SLAB_ATTR_RO(failslab);
5372 : #endif
5373 :
5374 0 : static ssize_t shrink_show(struct kmem_cache *s, char *buf)
5375 : {
5376 0 : return 0;
5377 : }
5378 :
5379 0 : static ssize_t shrink_store(struct kmem_cache *s,
5380 : const char *buf, size_t length)
5381 : {
5382 0 : if (buf[0] == '1')
5383 0 : kmem_cache_shrink(s);
5384 : else
5385 : return -EINVAL;
5386 0 : return length;
5387 : }
5388 : SLAB_ATTR(shrink);
5389 :
5390 : #ifdef CONFIG_NUMA
5391 0 : static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
5392 : {
5393 0 : return sysfs_emit(buf, "%u\n", s->remote_node_defrag_ratio / 10);
5394 : }
5395 :
5396 0 : static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
5397 : const char *buf, size_t length)
5398 : {
5399 0 : unsigned int ratio;
5400 0 : int err;
5401 :
5402 0 : err = kstrtouint(buf, 10, &ratio);
5403 0 : if (err)
5404 0 : return err;
5405 0 : if (ratio > 100)
5406 : return -ERANGE;
5407 :
5408 0 : s->remote_node_defrag_ratio = ratio * 10;
5409 :
5410 0 : return length;
5411 : }
5412 : SLAB_ATTR(remote_node_defrag_ratio);
5413 : #endif
5414 :
5415 : #ifdef CONFIG_SLUB_STATS
5416 : static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5417 : {
5418 : unsigned long sum = 0;
5419 : int cpu;
5420 : int len = 0;
5421 : int *data = kmalloc_array(nr_cpu_ids, sizeof(int), GFP_KERNEL);
5422 :
5423 : if (!data)
5424 : return -ENOMEM;
5425 :
5426 : for_each_online_cpu(cpu) {
5427 : unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5428 :
5429 : data[cpu] = x;
5430 : sum += x;
5431 : }
5432 :
5433 : len += sysfs_emit_at(buf, len, "%lu", sum);
5434 :
5435 : #ifdef CONFIG_SMP
5436 : for_each_online_cpu(cpu) {
5437 : if (data[cpu])
5438 : len += sysfs_emit_at(buf, len, " C%d=%u",
5439 : cpu, data[cpu]);
5440 : }
5441 : #endif
5442 : kfree(data);
5443 : len += sysfs_emit_at(buf, len, "\n");
5444 :
5445 : return len;
5446 : }
5447 :
5448 : static void clear_stat(struct kmem_cache *s, enum stat_item si)
5449 : {
5450 : int cpu;
5451 :
5452 : for_each_online_cpu(cpu)
5453 : per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5454 : }
5455 :
5456 : #define STAT_ATTR(si, text) \
5457 : static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5458 : { \
5459 : return show_stat(s, buf, si); \
5460 : } \
5461 : static ssize_t text##_store(struct kmem_cache *s, \
5462 : const char *buf, size_t length) \
5463 : { \
5464 : if (buf[0] != '0') \
5465 : return -EINVAL; \
5466 : clear_stat(s, si); \
5467 : return length; \
5468 : } \
5469 : SLAB_ATTR(text); \
5470 :
5471 : STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5472 : STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5473 : STAT_ATTR(FREE_FASTPATH, free_fastpath);
5474 : STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5475 : STAT_ATTR(FREE_FROZEN, free_frozen);
5476 : STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5477 : STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5478 : STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5479 : STAT_ATTR(ALLOC_SLAB, alloc_slab);
5480 : STAT_ATTR(ALLOC_REFILL, alloc_refill);
5481 : STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5482 : STAT_ATTR(FREE_SLAB, free_slab);
5483 : STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5484 : STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5485 : STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5486 : STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5487 : STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5488 : STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5489 : STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5490 : STAT_ATTR(ORDER_FALLBACK, order_fallback);
5491 : STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5492 : STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5493 : STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5494 : STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5495 : STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5496 : STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5497 : #endif /* CONFIG_SLUB_STATS */
5498 :
5499 : static struct attribute *slab_attrs[] = {
5500 : &slab_size_attr.attr,
5501 : &object_size_attr.attr,
5502 : &objs_per_slab_attr.attr,
5503 : &order_attr.attr,
5504 : &min_partial_attr.attr,
5505 : &cpu_partial_attr.attr,
5506 : &objects_attr.attr,
5507 : &objects_partial_attr.attr,
5508 : &partial_attr.attr,
5509 : &cpu_slabs_attr.attr,
5510 : &ctor_attr.attr,
5511 : &aliases_attr.attr,
5512 : &align_attr.attr,
5513 : &hwcache_align_attr.attr,
5514 : &reclaim_account_attr.attr,
5515 : &destroy_by_rcu_attr.attr,
5516 : &shrink_attr.attr,
5517 : &slabs_cpu_partial_attr.attr,
5518 : #ifdef CONFIG_SLUB_DEBUG
5519 : &total_objects_attr.attr,
5520 : &slabs_attr.attr,
5521 : &sanity_checks_attr.attr,
5522 : &trace_attr.attr,
5523 : &red_zone_attr.attr,
5524 : &poison_attr.attr,
5525 : &store_user_attr.attr,
5526 : &validate_attr.attr,
5527 : &alloc_calls_attr.attr,
5528 : &free_calls_attr.attr,
5529 : #endif
5530 : #ifdef CONFIG_ZONE_DMA
5531 : &cache_dma_attr.attr,
5532 : #endif
5533 : #ifdef CONFIG_NUMA
5534 : &remote_node_defrag_ratio_attr.attr,
5535 : #endif
5536 : #ifdef CONFIG_SLUB_STATS
5537 : &alloc_fastpath_attr.attr,
5538 : &alloc_slowpath_attr.attr,
5539 : &free_fastpath_attr.attr,
5540 : &free_slowpath_attr.attr,
5541 : &free_frozen_attr.attr,
5542 : &free_add_partial_attr.attr,
5543 : &free_remove_partial_attr.attr,
5544 : &alloc_from_partial_attr.attr,
5545 : &alloc_slab_attr.attr,
5546 : &alloc_refill_attr.attr,
5547 : &alloc_node_mismatch_attr.attr,
5548 : &free_slab_attr.attr,
5549 : &cpuslab_flush_attr.attr,
5550 : &deactivate_full_attr.attr,
5551 : &deactivate_empty_attr.attr,
5552 : &deactivate_to_head_attr.attr,
5553 : &deactivate_to_tail_attr.attr,
5554 : &deactivate_remote_frees_attr.attr,
5555 : &deactivate_bypass_attr.attr,
5556 : &order_fallback_attr.attr,
5557 : &cmpxchg_double_fail_attr.attr,
5558 : &cmpxchg_double_cpu_fail_attr.attr,
5559 : &cpu_partial_alloc_attr.attr,
5560 : &cpu_partial_free_attr.attr,
5561 : &cpu_partial_node_attr.attr,
5562 : &cpu_partial_drain_attr.attr,
5563 : #endif
5564 : #ifdef CONFIG_FAILSLAB
5565 : &failslab_attr.attr,
5566 : #endif
5567 : &usersize_attr.attr,
5568 :
5569 : NULL
5570 : };
5571 :
5572 : static const struct attribute_group slab_attr_group = {
5573 : .attrs = slab_attrs,
5574 : };
5575 :
5576 0 : static ssize_t slab_attr_show(struct kobject *kobj,
5577 : struct attribute *attr,
5578 : char *buf)
5579 : {
5580 0 : struct slab_attribute *attribute;
5581 0 : struct kmem_cache *s;
5582 0 : int err;
5583 :
5584 0 : attribute = to_slab_attr(attr);
5585 0 : s = to_slab(kobj);
5586 :
5587 0 : if (!attribute->show)
5588 : return -EIO;
5589 :
5590 0 : err = attribute->show(s, buf);
5591 :
5592 0 : return err;
5593 : }
5594 :
5595 0 : static ssize_t slab_attr_store(struct kobject *kobj,
5596 : struct attribute *attr,
5597 : const char *buf, size_t len)
5598 : {
5599 0 : struct slab_attribute *attribute;
5600 0 : struct kmem_cache *s;
5601 0 : int err;
5602 :
5603 0 : attribute = to_slab_attr(attr);
5604 0 : s = to_slab(kobj);
5605 :
5606 0 : if (!attribute->store)
5607 : return -EIO;
5608 :
5609 0 : err = attribute->store(s, buf, len);
5610 0 : return err;
5611 : }
5612 :
5613 0 : static void kmem_cache_release(struct kobject *k)
5614 : {
5615 0 : slab_kmem_cache_release(to_slab(k));
5616 0 : }
5617 :
5618 : static const struct sysfs_ops slab_sysfs_ops = {
5619 : .show = slab_attr_show,
5620 : .store = slab_attr_store,
5621 : };
5622 :
5623 : static struct kobj_type slab_ktype = {
5624 : .sysfs_ops = &slab_sysfs_ops,
5625 : .release = kmem_cache_release,
5626 : };
5627 :
5628 : static struct kset *slab_kset;
5629 :
5630 145 : static inline struct kset *cache_kset(struct kmem_cache *s)
5631 : {
5632 145 : return slab_kset;
5633 : }
5634 :
5635 : #define ID_STR_LENGTH 64
5636 :
5637 : /* Create a unique string id for a slab cache:
5638 : *
5639 : * Format :[flags-]size
5640 : */
5641 0 : static char *create_unique_id(struct kmem_cache *s)
5642 : {
5643 0 : char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5644 0 : char *p = name;
5645 :
5646 0 : BUG_ON(!name);
5647 :
5648 0 : *p++ = ':';
5649 : /*
5650 : * First flags affecting slabcache operations. We will only
5651 : * get here for aliasable slabs so we do not need to support
5652 : * too many flags. The flags here must cover all flags that
5653 : * are matched during merging to guarantee that the id is
5654 : * unique.
5655 : */
5656 0 : if (s->flags & SLAB_CACHE_DMA)
5657 0 : *p++ = 'd';
5658 0 : if (s->flags & SLAB_CACHE_DMA32)
5659 0 : *p++ = 'D';
5660 0 : if (s->flags & SLAB_RECLAIM_ACCOUNT)
5661 0 : *p++ = 'a';
5662 0 : if (s->flags & SLAB_CONSISTENCY_CHECKS)
5663 0 : *p++ = 'F';
5664 0 : if (s->flags & SLAB_ACCOUNT)
5665 : *p++ = 'A';
5666 0 : if (p != name + 1)
5667 0 : *p++ = '-';
5668 0 : p += sprintf(p, "%07u", s->size);
5669 :
5670 0 : BUG_ON(p > name + ID_STR_LENGTH - 1);
5671 0 : return name;
5672 : }
5673 :
5674 145 : static int sysfs_slab_add(struct kmem_cache *s)
5675 : {
5676 145 : int err;
5677 145 : const char *name;
5678 145 : struct kset *kset = cache_kset(s);
5679 145 : int unmergeable = slab_unmergeable(s);
5680 :
5681 145 : if (!kset) {
5682 0 : kobject_init(&s->kobj, &slab_ktype);
5683 0 : return 0;
5684 : }
5685 :
5686 145 : if (!unmergeable && disable_higher_order_debug &&
5687 0 : (slub_debug & DEBUG_METADATA_FLAGS))
5688 : unmergeable = 1;
5689 :
5690 145 : if (unmergeable) {
5691 : /*
5692 : * Slabcache can never be merged so we can use the name proper.
5693 : * This is typically the case for debug situations. In that
5694 : * case we can catch duplicate names easily.
5695 : */
5696 145 : sysfs_remove_link(&slab_kset->kobj, s->name);
5697 145 : name = s->name;
5698 : } else {
5699 : /*
5700 : * Create a unique name for the slab as a target
5701 : * for the symlinks.
5702 : */
5703 0 : name = create_unique_id(s);
5704 : }
5705 :
5706 145 : s->kobj.kset = kset;
5707 145 : err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
5708 145 : if (err)
5709 0 : goto out;
5710 :
5711 145 : err = sysfs_create_group(&s->kobj, &slab_attr_group);
5712 145 : if (err)
5713 0 : goto out_del_kobj;
5714 :
5715 145 : if (!unmergeable) {
5716 : /* Setup first alias */
5717 0 : sysfs_slab_alias(s, s->name);
5718 : }
5719 145 : out:
5720 145 : if (!unmergeable)
5721 0 : kfree(name);
5722 : return err;
5723 0 : out_del_kobj:
5724 0 : kobject_del(&s->kobj);
5725 0 : goto out;
5726 : }
5727 :
5728 0 : void sysfs_slab_unlink(struct kmem_cache *s)
5729 : {
5730 0 : if (slab_state >= FULL)
5731 0 : kobject_del(&s->kobj);
5732 0 : }
5733 :
5734 0 : void sysfs_slab_release(struct kmem_cache *s)
5735 : {
5736 0 : if (slab_state >= FULL)
5737 0 : kobject_put(&s->kobj);
5738 0 : }
5739 :
5740 : /*
5741 : * Need to buffer aliases during bootup until sysfs becomes
5742 : * available lest we lose that information.
5743 : */
5744 : struct saved_alias {
5745 : struct kmem_cache *s;
5746 : const char *name;
5747 : struct saved_alias *next;
5748 : };
5749 :
5750 : static struct saved_alias *alias_list;
5751 :
5752 0 : static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5753 : {
5754 0 : struct saved_alias *al;
5755 :
5756 0 : if (slab_state == FULL) {
5757 : /*
5758 : * If we have a leftover link then remove it.
5759 : */
5760 0 : sysfs_remove_link(&slab_kset->kobj, name);
5761 0 : return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5762 : }
5763 :
5764 0 : al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5765 0 : if (!al)
5766 : return -ENOMEM;
5767 :
5768 0 : al->s = s;
5769 0 : al->name = name;
5770 0 : al->next = alias_list;
5771 0 : alias_list = al;
5772 0 : return 0;
5773 : }
5774 :
5775 1 : static int __init slab_sysfs_init(void)
5776 : {
5777 1 : struct kmem_cache *s;
5778 1 : int err;
5779 :
5780 1 : mutex_lock(&slab_mutex);
5781 :
5782 1 : slab_kset = kset_create_and_add("slab", NULL, kernel_kobj);
5783 1 : if (!slab_kset) {
5784 0 : mutex_unlock(&slab_mutex);
5785 0 : pr_err("Cannot register slab subsystem.\n");
5786 0 : return -ENOSYS;
5787 : }
5788 :
5789 1 : slab_state = FULL;
5790 :
5791 109 : list_for_each_entry(s, &slab_caches, list) {
5792 108 : err = sysfs_slab_add(s);
5793 108 : if (err)
5794 0 : pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
5795 : s->name);
5796 : }
5797 :
5798 1 : while (alias_list) {
5799 0 : struct saved_alias *al = alias_list;
5800 :
5801 0 : alias_list = alias_list->next;
5802 0 : err = sysfs_slab_alias(al->s, al->name);
5803 0 : if (err)
5804 0 : pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
5805 : al->name);
5806 0 : kfree(al);
5807 : }
5808 :
5809 1 : mutex_unlock(&slab_mutex);
5810 1 : resiliency_test();
5811 1 : return 0;
5812 : }
5813 :
5814 : __initcall(slab_sysfs_init);
5815 : #endif /* CONFIG_SYSFS */
5816 :
5817 : /*
5818 : * The /proc/slabinfo ABI
5819 : */
5820 : #ifdef CONFIG_SLUB_DEBUG
5821 0 : void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5822 : {
5823 0 : unsigned long nr_slabs = 0;
5824 0 : unsigned long nr_objs = 0;
5825 0 : unsigned long nr_free = 0;
5826 0 : int node;
5827 0 : struct kmem_cache_node *n;
5828 :
5829 0 : for_each_kmem_cache_node(s, node, n) {
5830 0 : nr_slabs += node_nr_slabs(n);
5831 0 : nr_objs += node_nr_objs(n);
5832 0 : nr_free += count_partial(n, count_free);
5833 : }
5834 :
5835 0 : sinfo->active_objs = nr_objs - nr_free;
5836 0 : sinfo->num_objs = nr_objs;
5837 0 : sinfo->active_slabs = nr_slabs;
5838 0 : sinfo->num_slabs = nr_slabs;
5839 0 : sinfo->objects_per_slab = oo_objects(s->oo);
5840 0 : sinfo->cache_order = oo_order(s->oo);
5841 0 : }
5842 :
5843 0 : void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
5844 : {
5845 0 : }
5846 :
5847 0 : ssize_t slabinfo_write(struct file *file, const char __user *buffer,
5848 : size_t count, loff_t *ppos)
5849 : {
5850 0 : return -EIO;
5851 : }
5852 : #endif /* CONFIG_SLUB_DEBUG */
|