Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
4 : * Authors: David Chinner and Glauber Costa
5 : *
6 : * Generic LRU infrastructure
7 : */
8 : #include <linux/kernel.h>
9 : #include <linux/module.h>
10 : #include <linux/mm.h>
11 : #include <linux/list_lru.h>
12 : #include <linux/slab.h>
13 : #include <linux/mutex.h>
14 : #include <linux/memcontrol.h>
15 : #include "slab.h"
16 :
17 : #ifdef CONFIG_MEMCG_KMEM
18 : static LIST_HEAD(list_lrus);
19 : static DEFINE_MUTEX(list_lrus_mutex);
20 :
21 : static void list_lru_register(struct list_lru *lru)
22 : {
23 : mutex_lock(&list_lrus_mutex);
24 : list_add(&lru->list, &list_lrus);
25 : mutex_unlock(&list_lrus_mutex);
26 : }
27 :
28 : static void list_lru_unregister(struct list_lru *lru)
29 : {
30 : mutex_lock(&list_lrus_mutex);
31 : list_del(&lru->list);
32 : mutex_unlock(&list_lrus_mutex);
33 : }
34 :
35 : static int lru_shrinker_id(struct list_lru *lru)
36 : {
37 : return lru->shrinker_id;
38 : }
39 :
40 : static inline bool list_lru_memcg_aware(struct list_lru *lru)
41 : {
42 : return lru->memcg_aware;
43 : }
44 :
45 : static inline struct list_lru_one *
46 : list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
47 : {
48 : struct list_lru_memcg *memcg_lrus;
49 : /*
50 : * Either lock or RCU protects the array of per cgroup lists
51 : * from relocation (see memcg_update_list_lru_node).
52 : */
53 : memcg_lrus = rcu_dereference_check(nlru->memcg_lrus,
54 : lockdep_is_held(&nlru->lock));
55 : if (memcg_lrus && idx >= 0)
56 : return memcg_lrus->lru[idx];
57 : return &nlru->lru;
58 : }
59 :
60 : static inline struct list_lru_one *
61 : list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
62 : struct mem_cgroup **memcg_ptr)
63 : {
64 : struct list_lru_one *l = &nlru->lru;
65 : struct mem_cgroup *memcg = NULL;
66 :
67 : if (!nlru->memcg_lrus)
68 : goto out;
69 :
70 : memcg = mem_cgroup_from_obj(ptr);
71 : if (!memcg)
72 : goto out;
73 :
74 : l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
75 : out:
76 : if (memcg_ptr)
77 : *memcg_ptr = memcg;
78 : return l;
79 : }
80 : #else
81 : static void list_lru_register(struct list_lru *lru)
82 : {
83 : }
84 :
85 198 : static void list_lru_unregister(struct list_lru *lru)
86 : {
87 198 : }
88 :
89 : static int lru_shrinker_id(struct list_lru *lru)
90 : {
91 : return -1;
92 : }
93 :
94 : static inline bool list_lru_memcg_aware(struct list_lru *lru)
95 : {
96 : return false;
97 : }
98 :
99 : static inline struct list_lru_one *
100 3 : list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
101 : {
102 3 : return &nlru->lru;
103 : }
104 :
105 : static inline struct list_lru_one *
106 13877 : list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
107 : struct mem_cgroup **memcg_ptr)
108 : {
109 13877 : if (memcg_ptr)
110 11268 : *memcg_ptr = NULL;
111 13877 : return &nlru->lru;
112 : }
113 : #endif /* CONFIG_MEMCG_KMEM */
114 :
115 11268 : bool list_lru_add(struct list_lru *lru, struct list_head *item)
116 : {
117 11268 : int nid = page_to_nid(virt_to_page(item));
118 11268 : struct list_lru_node *nlru = &lru->node[nid];
119 11268 : struct mem_cgroup *memcg;
120 11268 : struct list_lru_one *l;
121 :
122 11268 : spin_lock(&nlru->lock);
123 11268 : if (list_empty(item)) {
124 11268 : l = list_lru_from_kmem(nlru, item, &memcg);
125 11268 : list_add_tail(item, &l->list);
126 : /* Set shrinker bit if the first element was added */
127 11268 : if (!l->nr_items++)
128 11268 : memcg_set_shrinker_bit(memcg, nid,
129 : lru_shrinker_id(lru));
130 11268 : nlru->nr_items++;
131 11268 : spin_unlock(&nlru->lock);
132 11268 : return true;
133 : }
134 0 : spin_unlock(&nlru->lock);
135 0 : return false;
136 : }
137 : EXPORT_SYMBOL_GPL(list_lru_add);
138 :
139 2609 : bool list_lru_del(struct list_lru *lru, struct list_head *item)
140 : {
141 2609 : int nid = page_to_nid(virt_to_page(item));
142 2609 : struct list_lru_node *nlru = &lru->node[nid];
143 2609 : struct list_lru_one *l;
144 :
145 2609 : spin_lock(&nlru->lock);
146 2609 : if (!list_empty(item)) {
147 2609 : l = list_lru_from_kmem(nlru, item, NULL);
148 2609 : list_del_init(item);
149 2609 : l->nr_items--;
150 2609 : nlru->nr_items--;
151 2609 : spin_unlock(&nlru->lock);
152 2609 : return true;
153 : }
154 0 : spin_unlock(&nlru->lock);
155 0 : return false;
156 : }
157 : EXPORT_SYMBOL_GPL(list_lru_del);
158 :
159 0 : void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
160 : {
161 0 : list_del_init(item);
162 0 : list->nr_items--;
163 0 : }
164 : EXPORT_SYMBOL_GPL(list_lru_isolate);
165 :
166 559 : void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
167 : struct list_head *head)
168 : {
169 559 : list_move(item, head);
170 559 : list->nr_items--;
171 559 : }
172 : EXPORT_SYMBOL_GPL(list_lru_isolate_move);
173 :
174 0 : unsigned long list_lru_count_one(struct list_lru *lru,
175 : int nid, struct mem_cgroup *memcg)
176 : {
177 0 : struct list_lru_node *nlru = &lru->node[nid];
178 0 : struct list_lru_one *l;
179 0 : unsigned long count;
180 :
181 0 : rcu_read_lock();
182 0 : l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
183 0 : count = READ_ONCE(l->nr_items);
184 0 : rcu_read_unlock();
185 :
186 0 : return count;
187 : }
188 : EXPORT_SYMBOL_GPL(list_lru_count_one);
189 :
190 3 : unsigned long list_lru_count_node(struct list_lru *lru, int nid)
191 : {
192 3 : struct list_lru_node *nlru;
193 :
194 3 : nlru = &lru->node[nid];
195 3 : return nlru->nr_items;
196 : }
197 : EXPORT_SYMBOL_GPL(list_lru_count_node);
198 :
199 : static unsigned long
200 3 : __list_lru_walk_one(struct list_lru_node *nlru, int memcg_idx,
201 : list_lru_walk_cb isolate, void *cb_arg,
202 : unsigned long *nr_to_walk)
203 : {
204 :
205 3 : struct list_lru_one *l;
206 3 : struct list_head *item, *n;
207 3 : unsigned long isolated = 0;
208 :
209 3 : l = list_lru_from_memcg_idx(nlru, memcg_idx);
210 : restart:
211 562 : list_for_each_safe(item, n, &l->list) {
212 559 : enum lru_status ret;
213 :
214 : /*
215 : * decrement nr_to_walk first so that we don't livelock if we
216 : * get stuck on large numbers of LRU_RETRY items
217 : */
218 559 : if (!*nr_to_walk)
219 : break;
220 559 : --*nr_to_walk;
221 :
222 559 : ret = isolate(item, l, &nlru->lock, cb_arg);
223 559 : switch (ret) {
224 0 : case LRU_REMOVED_RETRY:
225 0 : assert_spin_locked(&nlru->lock);
226 559 : fallthrough;
227 : case LRU_REMOVED:
228 559 : isolated++;
229 559 : nlru->nr_items--;
230 : /*
231 : * If the lru lock has been dropped, our list
232 : * traversal is now invalid and so we have to
233 : * restart from scratch.
234 : */
235 559 : if (ret == LRU_REMOVED_RETRY)
236 0 : goto restart;
237 : break;
238 0 : case LRU_ROTATE:
239 0 : list_move_tail(item, &l->list);
240 : break;
241 : case LRU_SKIP:
242 : break;
243 0 : case LRU_RETRY:
244 : /*
245 : * The lru lock has been dropped, our list traversal is
246 : * now invalid and so we have to restart from scratch.
247 : */
248 0 : assert_spin_locked(&nlru->lock);
249 0 : goto restart;
250 0 : default:
251 559 : BUG();
252 : }
253 : }
254 3 : return isolated;
255 : }
256 :
257 : unsigned long
258 3 : list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
259 : list_lru_walk_cb isolate, void *cb_arg,
260 : unsigned long *nr_to_walk)
261 : {
262 3 : struct list_lru_node *nlru = &lru->node[nid];
263 3 : unsigned long ret;
264 :
265 3 : spin_lock(&nlru->lock);
266 3 : ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
267 : nr_to_walk);
268 3 : spin_unlock(&nlru->lock);
269 3 : return ret;
270 : }
271 : EXPORT_SYMBOL_GPL(list_lru_walk_one);
272 :
273 : unsigned long
274 0 : list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
275 : list_lru_walk_cb isolate, void *cb_arg,
276 : unsigned long *nr_to_walk)
277 : {
278 0 : struct list_lru_node *nlru = &lru->node[nid];
279 0 : unsigned long ret;
280 :
281 0 : spin_lock_irq(&nlru->lock);
282 0 : ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
283 : nr_to_walk);
284 0 : spin_unlock_irq(&nlru->lock);
285 0 : return ret;
286 : }
287 :
288 3 : unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
289 : list_lru_walk_cb isolate, void *cb_arg,
290 : unsigned long *nr_to_walk)
291 : {
292 3 : long isolated = 0;
293 3 : int memcg_idx;
294 :
295 3 : isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
296 : nr_to_walk);
297 3 : if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
298 3 : for_each_memcg_cache_index(memcg_idx) {
299 : struct list_lru_node *nlru = &lru->node[nid];
300 :
301 : spin_lock(&nlru->lock);
302 : isolated += __list_lru_walk_one(nlru, memcg_idx,
303 : isolate, cb_arg,
304 : nr_to_walk);
305 : spin_unlock(&nlru->lock);
306 :
307 : if (*nr_to_walk <= 0)
308 : break;
309 : }
310 : }
311 3 : return isolated;
312 : }
313 : EXPORT_SYMBOL_GPL(list_lru_walk_node);
314 :
315 247 : static void init_one_lru(struct list_lru_one *l)
316 : {
317 247 : INIT_LIST_HEAD(&l->list);
318 247 : l->nr_items = 0;
319 : }
320 :
321 : #ifdef CONFIG_MEMCG_KMEM
322 : static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
323 : int begin, int end)
324 : {
325 : int i;
326 :
327 : for (i = begin; i < end; i++)
328 : kfree(memcg_lrus->lru[i]);
329 : }
330 :
331 : static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
332 : int begin, int end)
333 : {
334 : int i;
335 :
336 : for (i = begin; i < end; i++) {
337 : struct list_lru_one *l;
338 :
339 : l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
340 : if (!l)
341 : goto fail;
342 :
343 : init_one_lru(l);
344 : memcg_lrus->lru[i] = l;
345 : }
346 : return 0;
347 : fail:
348 : __memcg_destroy_list_lru_node(memcg_lrus, begin, i);
349 : return -ENOMEM;
350 : }
351 :
352 : static int memcg_init_list_lru_node(struct list_lru_node *nlru)
353 : {
354 : struct list_lru_memcg *memcg_lrus;
355 : int size = memcg_nr_cache_ids;
356 :
357 : memcg_lrus = kvmalloc(sizeof(*memcg_lrus) +
358 : size * sizeof(void *), GFP_KERNEL);
359 : if (!memcg_lrus)
360 : return -ENOMEM;
361 :
362 : if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) {
363 : kvfree(memcg_lrus);
364 : return -ENOMEM;
365 : }
366 : RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus);
367 :
368 : return 0;
369 : }
370 :
371 : static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
372 : {
373 : struct list_lru_memcg *memcg_lrus;
374 : /*
375 : * This is called when shrinker has already been unregistered,
376 : * and nobody can use it. So, there is no need to use kvfree_rcu().
377 : */
378 : memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
379 : __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
380 : kvfree(memcg_lrus);
381 : }
382 :
383 : static int memcg_update_list_lru_node(struct list_lru_node *nlru,
384 : int old_size, int new_size)
385 : {
386 : struct list_lru_memcg *old, *new;
387 :
388 : BUG_ON(old_size > new_size);
389 :
390 : old = rcu_dereference_protected(nlru->memcg_lrus,
391 : lockdep_is_held(&list_lrus_mutex));
392 : new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL);
393 : if (!new)
394 : return -ENOMEM;
395 :
396 : if (__memcg_init_list_lru_node(new, old_size, new_size)) {
397 : kvfree(new);
398 : return -ENOMEM;
399 : }
400 :
401 : memcpy(&new->lru, &old->lru, old_size * sizeof(void *));
402 :
403 : /*
404 : * The locking below allows readers that hold nlru->lock avoid taking
405 : * rcu_read_lock (see list_lru_from_memcg_idx).
406 : *
407 : * Since list_lru_{add,del} may be called under an IRQ-safe lock,
408 : * we have to use IRQ-safe primitives here to avoid deadlock.
409 : */
410 : spin_lock_irq(&nlru->lock);
411 : rcu_assign_pointer(nlru->memcg_lrus, new);
412 : spin_unlock_irq(&nlru->lock);
413 :
414 : kvfree_rcu(old, rcu);
415 : return 0;
416 : }
417 :
418 : static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
419 : int old_size, int new_size)
420 : {
421 : struct list_lru_memcg *memcg_lrus;
422 :
423 : memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus,
424 : lockdep_is_held(&list_lrus_mutex));
425 : /* do not bother shrinking the array back to the old size, because we
426 : * cannot handle allocation failures here */
427 : __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size);
428 : }
429 :
430 : static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
431 : {
432 : int i;
433 :
434 : lru->memcg_aware = memcg_aware;
435 :
436 : if (!memcg_aware)
437 : return 0;
438 :
439 : for_each_node(i) {
440 : if (memcg_init_list_lru_node(&lru->node[i]))
441 : goto fail;
442 : }
443 : return 0;
444 : fail:
445 : for (i = i - 1; i >= 0; i--) {
446 : if (!lru->node[i].memcg_lrus)
447 : continue;
448 : memcg_destroy_list_lru_node(&lru->node[i]);
449 : }
450 : return -ENOMEM;
451 : }
452 :
453 : static void memcg_destroy_list_lru(struct list_lru *lru)
454 : {
455 : int i;
456 :
457 : if (!list_lru_memcg_aware(lru))
458 : return;
459 :
460 : for_each_node(i)
461 : memcg_destroy_list_lru_node(&lru->node[i]);
462 : }
463 :
464 : static int memcg_update_list_lru(struct list_lru *lru,
465 : int old_size, int new_size)
466 : {
467 : int i;
468 :
469 : if (!list_lru_memcg_aware(lru))
470 : return 0;
471 :
472 : for_each_node(i) {
473 : if (memcg_update_list_lru_node(&lru->node[i],
474 : old_size, new_size))
475 : goto fail;
476 : }
477 : return 0;
478 : fail:
479 : for (i = i - 1; i >= 0; i--) {
480 : if (!lru->node[i].memcg_lrus)
481 : continue;
482 :
483 : memcg_cancel_update_list_lru_node(&lru->node[i],
484 : old_size, new_size);
485 : }
486 : return -ENOMEM;
487 : }
488 :
489 : static void memcg_cancel_update_list_lru(struct list_lru *lru,
490 : int old_size, int new_size)
491 : {
492 : int i;
493 :
494 : if (!list_lru_memcg_aware(lru))
495 : return;
496 :
497 : for_each_node(i)
498 : memcg_cancel_update_list_lru_node(&lru->node[i],
499 : old_size, new_size);
500 : }
501 :
502 : int memcg_update_all_list_lrus(int new_size)
503 : {
504 : int ret = 0;
505 : struct list_lru *lru;
506 : int old_size = memcg_nr_cache_ids;
507 :
508 : mutex_lock(&list_lrus_mutex);
509 : list_for_each_entry(lru, &list_lrus, list) {
510 : ret = memcg_update_list_lru(lru, old_size, new_size);
511 : if (ret)
512 : goto fail;
513 : }
514 : out:
515 : mutex_unlock(&list_lrus_mutex);
516 : return ret;
517 : fail:
518 : list_for_each_entry_continue_reverse(lru, &list_lrus, list)
519 : memcg_cancel_update_list_lru(lru, old_size, new_size);
520 : goto out;
521 : }
522 :
523 : static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
524 : int src_idx, struct mem_cgroup *dst_memcg)
525 : {
526 : struct list_lru_node *nlru = &lru->node[nid];
527 : int dst_idx = dst_memcg->kmemcg_id;
528 : struct list_lru_one *src, *dst;
529 :
530 : /*
531 : * Since list_lru_{add,del} may be called under an IRQ-safe lock,
532 : * we have to use IRQ-safe primitives here to avoid deadlock.
533 : */
534 : spin_lock_irq(&nlru->lock);
535 :
536 : src = list_lru_from_memcg_idx(nlru, src_idx);
537 : dst = list_lru_from_memcg_idx(nlru, dst_idx);
538 :
539 : list_splice_init(&src->list, &dst->list);
540 :
541 : if (src->nr_items) {
542 : dst->nr_items += src->nr_items;
543 : memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
544 : src->nr_items = 0;
545 : }
546 :
547 : spin_unlock_irq(&nlru->lock);
548 : }
549 :
550 : static void memcg_drain_list_lru(struct list_lru *lru,
551 : int src_idx, struct mem_cgroup *dst_memcg)
552 : {
553 : int i;
554 :
555 : if (!list_lru_memcg_aware(lru))
556 : return;
557 :
558 : for_each_node(i)
559 : memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg);
560 : }
561 :
562 : void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
563 : {
564 : struct list_lru *lru;
565 :
566 : mutex_lock(&list_lrus_mutex);
567 : list_for_each_entry(lru, &list_lrus, list)
568 : memcg_drain_list_lru(lru, src_idx, dst_memcg);
569 : mutex_unlock(&list_lrus_mutex);
570 : }
571 : #else
572 : static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
573 : {
574 : return 0;
575 : }
576 :
577 198 : static void memcg_destroy_list_lru(struct list_lru *lru)
578 : {
579 198 : }
580 : #endif /* CONFIG_MEMCG_KMEM */
581 :
582 247 : int __list_lru_init(struct list_lru *lru, bool memcg_aware,
583 : struct lock_class_key *key, struct shrinker *shrinker)
584 : {
585 247 : int i;
586 247 : int err = -ENOMEM;
587 :
588 : #ifdef CONFIG_MEMCG_KMEM
589 : if (shrinker)
590 : lru->shrinker_id = shrinker->id;
591 : else
592 : lru->shrinker_id = -1;
593 : #endif
594 247 : memcg_get_cache_ids();
595 :
596 247 : lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
597 247 : if (!lru->node)
598 0 : goto out;
599 :
600 494 : for_each_node(i) {
601 247 : spin_lock_init(&lru->node[i].lock);
602 247 : if (key)
603 1 : lockdep_set_class(&lru->node[i].lock, key);
604 247 : init_one_lru(&lru->node[i].lru);
605 : }
606 :
607 247 : err = memcg_init_list_lru(lru, memcg_aware);
608 : if (err) {
609 : kfree(lru->node);
610 : /* Do this so a list_lru_destroy() doesn't crash: */
611 : lru->node = NULL;
612 : goto out;
613 : }
614 :
615 247 : list_lru_register(lru);
616 247 : out:
617 247 : memcg_put_cache_ids();
618 247 : return err;
619 : }
620 : EXPORT_SYMBOL_GPL(__list_lru_init);
621 :
622 198 : void list_lru_destroy(struct list_lru *lru)
623 : {
624 : /* Already destroyed or not yet initialized? */
625 198 : if (!lru->node)
626 : return;
627 :
628 198 : memcg_get_cache_ids();
629 :
630 198 : list_lru_unregister(lru);
631 :
632 198 : memcg_destroy_list_lru(lru);
633 198 : kfree(lru->node);
634 198 : lru->node = NULL;
635 :
636 : #ifdef CONFIG_MEMCG_KMEM
637 : lru->shrinker_id = -1;
638 : #endif
639 198 : memcg_put_cache_ids();
640 : }
641 : EXPORT_SYMBOL_GPL(list_lru_destroy);
|